########################################################################
########################################################################
# Replication code for:
#
# @article{CharnyshAPSR,
# title={Diversity, Institutions, and Economic Outcomes: Post-WWII Displacement in Poland},
# author={Charnysh, Volha},
# journal={American Political Science Review},
# year={2019},
# }
#
# Comments, questions: charnysh@mit.edu
########################################################################
########################################################################

rm(list=ls())

#Install missing packages
ipak <- function(pkg) {
	new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
	if (length(new.pkg)) 
		install.packages(new.pkg, dependencies = TRUE)
	sapply(pkg, require, character.only = TRUE)
}

packages<-c("plyr","stargazer", "MASS", "CBPS", "ggplot2", "sandwich", "rgdal", "proj4", "maptools", "spdep", "visreg", "lmtest", "xtable", "CBPS", "treatSens", "AER")
ipak(pkg = packages)

         
######################################################
########### Constructing the dataset #################
######################################################

#Change the directory accordingly to load the historic map : 
load("Gminy_data.RData")

gis<-read.csv("MergingGminy.csv") #Data on territorial overlap, computed in ArcGIS
gis2<-merge(gis, map[,c(6,12:22,31)], by.x="Code48", by.y="Code", all.x="TRUE")
head(gis2)

#Calculate share of the total area of 1948 gmina that belongs into a particular contemporary (2013) gmina
#Some numerical variables become characters, so convert back to numeric when necessary.

gis2$areaShare<-gis2$Area48Km2/gis2$AreaKm2
##Recalculate 1948 population data from historical to contemporary gminas based on the overlap
gis2$UrbPop2<-(gis2$Miasto*gis2$Poles)*gis2$areaShare
gis2$Poles2<-gis2$Poles*gis2$areaShare
gis2$fromUSSR2<-gis2$fromUSSR* gis2$areaShare
gis2$fromOtherC2<-as.numeric(gis2$fromOtherC)* gis2$areaShare
gis2$fromCentrP2<-as.numeric(gis2$fromCentrP)* gis2$areaShare
gis2$authocht2<-gis2$authocht* gis2$areaShare
gis2$Men2<-gis2$Men* gis2$areaShare
gis2$TotPop2<-as.numeric(gis2$Total_popu)*gis2$areaShare
gis2$Aged18_59_2<-(as.numeric(gis2$Aged18_59M) +as.numeric(gis2$Aged18_59W))*gis2$areaShare
gis2$Aged60Up2<-(as.numeric(gis2$Aged60Olde) +as.numeric(gis2$Aged60Ol_1))*gis2$areaShare
head(gis2)
summary(gis2$areaShare)


#Add up 1948 population data to the level of 2013 gminas using unique field "Kod_Teryt"
#This dataset includes all intersections, including contemporary municipalities that are split in two by the Oder-Niesse Line.
gis3<-ddply(gis2, .(Kod_Teryt), summarise, TotPop48=sum(TotPop2), Men48=sum(Men2), fromUSSR48=sum(fromUSSR2), fromCP48=sum(fromCentrP2), aut48=sum(authocht2), fromOther48=sum(fromOtherC2), Aged60Up48=sum(Aged60Up2), Aged18_5948=sum(Aged18_59_2), UrbPop48=sum(UrbPop2), Poles48=sum(Poles2))

#This data includes all municipalities fully located in the formerly German Territories.
germdat<-read.csv("covariates1939.csv") #N=630

histdata<-merge(germdat, gis3, by.y="Kod_Teryt", by.x="ID", all.x=TRUE)

t<-read.csv("data19802000.csv")

## Data at the level of Polish gminas circa 2010 (N=630). 
dat<-merge(t, histdata, by="ID", all.y=TRUE) 

##############################################
########### COMPUTING MAIN VARIABLES #########
##############################################

dat$PctUSSR<-dat$fromUSSR48/(dat$fromUSSR48 +dat$fromCP48 +dat$fromOther48+ dat$aut48)
dat$PctCP<-dat$fromCP48/(dat$fromUSSR48 +dat$fromCP48 +dat$fromOther48+ dat$aut48)
dat$PctOther<-dat$fromOther48/(dat$fromUSSR48 +dat$fromCP48 +dat$fromOther48+ dat$aut48)
dat$PctAut<-dat$aut48/(dat$fromUSSR48 +dat$fromCP48 +dat$fromOther48+ dat$aut48)

dat$PctUSSRWithin<-dat$PctUSSR/(1-dat$PctAut)
dat$PctCentrPWithin<-dat$PctCP/(1-dat$PctAut)
dat$PctOtherCWithin<-dat$PctOther/(1-dat$PctAut)

## Key independent variables: diversity of immigration computed using formula (8) in Alesina, Harnoss, Rapoport. 2016. J Econ Growth 21: 101-138. 
dat$divMig<-dat$PctUSSRWithin*(1-dat$PctUSSRWithin)+ dat$PctCentrPWithin*(1-dat$PctCentrPWithin)+ dat$PctOtherCWithin*(1-dat$PctOtherCWithin)
dat$shareMigrants<-1-dat$PctAut

dat$PctUrb1948<-dat$UrbPop48/dat$Poles48
dat$PctMen1948<-dat$Men48/dat$TotPop48
dat$PctOver6048<-dat$Aged60Up48/dat$TotPop48
dat$ShareAged1859<-dat$Aged18_5948/dat$TotPop48

#Variables from 1939: 
dat$SharePrzemysl1939<-dat$LudnoscPrzemysl/dat$LudnoscOgolem
dat$ShareOver100<-dat$GospOver100/dat$GospodRolne 


####################################################################
######################## GMINA BUDGETS IN EARLY 1990 ###############
####################################################################

dat$avgProptax9395<-(dat$PropTax95+ dat$PropTax94+dat$PropTax93)/3
dat$PropTax9395PC<-((dat$NieruchW95+ dat$nier94+ dat$PropW93)/dat$ludn9306)/3
dat$Exp9395pc<-(dat$TotExp93/dat$ludn9306 +dat$Wydatki95/dat$ludn9306 +dat$wyd94/dat$ludn9306)/3
dat$Exp9394pc<-(dat$TotExp93/dat$ludn9306 +dat$wyd94/dat$ludn9306)/2
dat$CapSpPC<-(dat$WydInw95+dat$wyd_inw94+ dat$CapitalSp93)/dat$ludn9306
dat$CapSpPC9394<-(dat$wyd_inw94+ dat$CapitalSp93)/dat$ludn9306

###################################################################
######################## ECONOMIC OUTCOMES IN 1980s ###############
###################################################################

dat$ShareRzemPr82<-dat$WRzemPryw1982/dat$Ludnosc1980
dat$WRzemPrPerThs82<-dat$WRzemPryw1982/(dat$Ludnosc1980/1000)
dat$TVsPerThs1980<-dat$TVs1980/(dat$Ludnosc1980/1000)
dat$TelsPerThs1980<-dat$Phones1980/(dat$Ludnosc1980/1000)
dat$ShareUspPerThs1982<-dat$WGospUspol1982/(dat$Ludnosc1980/1000)
dat$PunktyPerThous1980<-dat$Punkty/(dat$Ludnosc1980/1000)

## Additional variables for g-estimation and appendix 
dat$PtArrived7188<-(dat$Arrived1971_78No+dat$Arrived1981_88No)/dat$Pop1988 
dat$ShareRoln<-dat$GospUspRolnictwo/dat$Ludnosc1980 
dat$ShareWyzsze78<-(dat$WyszeW78+ dat$niepWysze78+ dat$policealne78)/dat$Pop15Above78
dat$ShareWyzsze88<-dat$TertEdu1988/dat$Pop10to191988  
dat$ShareWyzsze02<-(dat$Wyszce02+dat$Policeal02)/dat$Pop2002        
dat$EduDiv88<-1-(dat$ShareWyzsze88^2+(dat$VocEdu1988/dat$Pop10to191988)^2+(dat$SecEdu1988/dat$Pop10to191988)^2+(dat$PrimEdu1988/dat$Pop10to191988)^2)


dat$PtNotFromBirth<-1-dat$FromBirthNo1988/dat$Pop1988
dat$PtArrived7178<-dat$Arrived1971_78No/dat$Pop1988
dat$PtArrived8188<-dat$Arrived1981_88No/dat$Pop1988


dat$LibsPerThous1980<-dat$Biblioteki1982/(dat$Ludnosc1980/1000)
dat$SchoolsPerThous1980<-dat$Szkoly1982/(dat$Ludnosc1980/1000)
dat$WRolnUspRol1982<-dat$GospUspRolnictwo/(dat$Ludnosc1980/1000)
dat$WPrzemUsp1982<-dat$GospUspPrzemysl/(dat$Ludnosc1980/1000)
dat$SrWyrownPts80<-dat$SrodkiWyrownawcze1980/(dat$Ludnosc1980/1000)
dat$DochodyBudzetowePts1980 <-dat$DochodyBudzetowe1980/(dat$Ludnosc1980/1000)
dat$DochodyWlasnePts1980 <-dat$DochodyBudzetoweWlasne1980/(dat$Ludnosc1980/1000)

###################################################################
######################## ECONOMIC OUTCOMES IN 1990s ###############
###################################################################

dat$persIncTax1993pc<-dat$SharePIT93/dat$ludn9306
dat$persIncTax1995pc<-dat$IncPers95/dat$Pop1995
dat$persIncTax1998pc<-dat$IncPers98/dat$Pop1998
dat$persIncTax2000pc<-dat$IncPers00/dat$Pop2000

dat$PodmPryw1995perthous<-dat$PrywAll95/(dat$Pop1995/1000)
dat$PodmPryw1998perthous<-dat$PrywAll98/(dat$Pop1998/1000)
dat$PodmPryw2000perthous<-dat$PrywAll00/(dat$Pop2000/1000)


########################################################################
####  TABLE A1 in the Appendix: DESCRIPTIVE STATITISTICS  ##############
########################################################################

stargazer(dat[,c("divMig", "shareMigrants", "PctUSSR", "PctCP","PctAut", "PctOther","PctMen1948", "ShareAged1859", "TotPop48","PctUrb1948", "SharePrzemysl1939", "ShareOver100",  "distrail48", "DistPow1950", "DistLandBorder", "OSPna10tys1995", "Straz2007","avgProptax9395", "PropTax9395PC","ShareUspPerThs1982","WRzemPrPerThs82", "PunktyPerThous1980", "TVsPerThs1980","TelsPerThs1980",  "Szkoly1982", "Biblioteki1982", "ShareWyzsze78", "ShareWyzsze88", "ShareWyzsze02", "persIncTax1993pc", "persIncTax1995pc", "persIncTax1998pc", "persIncTax2000pc","PodmPryw1995perthous", "PodmPryw1998perthous", "PodmPryw2000perthous")], summary.stat=c("n", "mean","sd",  "min", "max"), digits=2, covariate.labels=c("Migrant Diversity (1948)", "Share Migrants (1948)", "Share from USSR (1948)", "Share from Central Poland (1948)", "Share Autochthonous (1948)" ,  "Share from Europe (1948)","Share Men (1948)", "Share Aged 18-59 (1948)", "Total Population (1948)", "Share Urban (1948)", "Share in Industry (1939)", "Share Farms Over 100 (1939)", "Distance to Railway (1948)", "Distance to County Seat (1950)", "Distance to Border (km)", "Volunteer Fire Brigades per 1,000 (1995-96) ", "Municipal Guard (2007)","Property Tax Rate (1993-95)", "Property Tax Revenue per capita, Zł. (1993-95) ", "Employed in Socialized Economy (1982)","In Private Handicrafts per 1,000 (1982)", "Shops per 1,000 (1980)", "TVs per 1,000 (1980)","Phones per 1,000 (1980)",  "Schools per 1000 (1982)", "Libraries per 1000 (1982)", "Share w/ Higher Edu (1978)", "Share w/ Higher Edu (1988)", "Share w/ Higher Edu (2002)", "Personal Income Tax per capita, Zł. (1993)", "Personal Income Tax per capita, Zł. (1995)",  "Personal Income Tax per capita, Zł. (1998)",  "Personal Income Tax per capita, Zł. (2000)",  "Private Enterprises per 1,000 (1995)", "Private Enterprises per 1000 (1998)",  "Private Enterprises per 1000 (2000)"))


####################################################################################
###########  TABLE A2: Migrant Diversity and Socio-Economic Covariates #############
####################################################################################

corrs<-rbind(cbind(cor.test(dat$divMig, dat$shareMigrants)$estimate, summary(lm(divMig~ shareMigrants, data=dat))$r.squared),
cbind(cor.test(dat$divMig, dat$PctMen1948)$estimate,summary(lm(divMig~ PctMen1948, data=dat))$r.squared),
cbind(cor.test(dat$divMig, dat$ShareAged1859)$estimate,summary(lm(divMig~ ShareAged1859, data=dat))$r.squared),
cbind(cor.test(dat$divMig, dat$PctUrb1948)$estimate,summary(lm(divMig~ PctUrb1948, data=dat))$r.squared), 
cbind(cor.test(dat$divMig, dat$SharePrzemysl1939)$estimate, summary(lm(divMig~ SharePrzemysl1939, data=dat))$r.squared), 
cbind(cor.test(dat$divMig, dat$ShareOver100)$estimate, summary(lm(divMig~ ShareOver100, data=dat))$r.squared),
cbind(cor.test(dat$divMig, log(dat$TotPop48))$estimate, summary(lm(divMig~ log(dat$TotPop48), data=dat))$r.squared), 
cbind(cor.test(dat$divMig, dat$DistPow1950)$estimate, summary(lm(divMig~ DistPow1950, data=dat))$r.squared),
cbind(cor.test(dat$divMig, log(dat$DistPow1950))$estimate, summary(lm(divMig~ log(DistPow1950), data=dat))$r.squared),
cbind(cor.test(dat$divMig, dat$distrail48)$estimate, summary(lm(divMig~ distrail48, data=dat))$r.squared),
cbind(cor.test(dat$divMig, dat$DistLandBorder)$estimate, summary(lm(divMig~ DistLandBorder, data=dat))$r.squared),
cbind(cor.test(dat$divMig, log(dat$DistLandBorder))$estimate, summary(lm(divMig~ log(DistLandBorder), data=dat))$r.squared))
corrs2<-as.data.frame(corrs, col.names=c("Variable","Pearson Correlation", "Propotion of explained variance (R^2)"), row.names=c("Share Migrants (1948)", "Share Male (1948)", "Share Aged 18-59 (1948)", "Share Urban (1948)","Share in Industry (1939)", "Share Farms over 100 ha (1939)", "Ln population (1948)", "Distance to county seat (1950)", "Ln Distance to County Seat (1950)", "Distance to Raiway (1948)", "Distance to Border", "Ln Distance to Border"))
names(corrs2)<-c("Pearson Correlation", "Propotion of explained variance (R^2)")

xtable(corrs2, digits=2)


####################################################################
####  Table 2: Public goods Provision  and Fiscal Capacity   #######
####################################################################

ospF<-lm(OSPna10tys1995 ~divMig+ shareMigrants + PctUrb1948+ SharePrzemysl1939 + ShareOver100  +DistPow1950+ DistLandBorder + distrail48+log(TotPop48)+ GermanRegion, data=dat)
summary(ospF)

strazF<-glm(Straz2007 ~divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48 +GermanRegion, data=dat,family=binomial(link='logit')) 
summary(strazF) 

strazOSP<-glm(Straz2007 ~ OSPna10tys1995 +PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48 +GermanRegion, data=dat,family=binomial(link='logit')) 
summary(strazOSP) 

propR9395F<-lm(avgProptax9395 ~  divMig+ shareMigrants+PctUrb1948+DistPow1950+ distrail48+ ShareOver100 + DistLandBorder+DistPow1950 +log(TotPop48)+ SharePrzemysl1939+GermanRegion, data=dat) 
summary(propR9395F) 

PropROSP<-lm(avgProptax9395  ~ OSPna10tys1995 +PctUrb1948+DistPow1950+ distrail48+ ShareOver100 + DistLandBorder+ distrail48+ DistPow1950+log(TotPop48)+ SharePrzemysl1939+ GermanRegion, data=dat)
summary(PropROSP)

propA9395F<-lm(log(PropTax9395PC) ~divMig+ shareMigrants+PctUrb1948+DistPow1950+ distrail48+ ShareOver100 + DistLandBorder+ distrail48+ DistPow1950+log(TotPop48)+ SharePrzemysl1939+ GermanRegion, data=dat)
summary(propA9395F)

PropAOSP<-lm(log(PropTax9395PC) ~ OSPna10tys1995 +PctUrb1948+DistPow1950+ distrail48+ ShareOver100 + DistLandBorder+ distrail48+ DistPow1950+log(TotPop48)+ SharePrzemysl1939+ GermanRegion, data=dat)
summary(PropAOSP)

stargazer(ospF, strazF, strazOSP, propR9395F, PropROSP, propA9395F, PropAOSP, style = 'APSR', digits=2, omit=c("GermanRegion", "Constant", "PctUrb1948", "SharePrzemysl1939", "ShareOver100", "DistPow1950", "DistLandBorder", "distrail48", "TotPop48"), omit.stat=c("rsq","ser","f"), add.lines=list(c("Covariates", "Yes", "Yes", "Yes","Yes", "Yes", "Yes", "Yes"), c("Region Fixed Effects", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes")), column.labels=c("Volunteer Fire Brigades", "Municipal Guard", "Property Tax Rate", "ln(Property Tax Revenue)"), column.separate=c(1, 2, 2, 2), dep.var.labels.include=FALSE, star.cutoffs=c(0.1, 0.05, 0.01), star.char=c("+", "*", "**"), notes = c("+ p<0.1; * p<0.05; ** p<0.01"), notes.append = F)

###Figure 4: Predicted Probability of Establishing a Municipal Guard based on models 2 & 3 in Table 2
par(mfrow=c(1,2))
visreg(strazF, "divMig",scale = "response", xlab="Migrant diversity", ylab="Probability of a municipal guard", fill.par=list(density = 30, angle = 45, col="gray"), line.par=list(col="black"))
visreg(strazOSP, "OSPna10tys1995", scale = "response",  xlab="Volunteer Fire Brigades per 10,000", ylab="Probability of a municipal guard", fill.par=list(density = 30, angle = 45, col="gray"), line.par=list(col="black"))


###############################################################################
################## Table 3: Economic activity in the 1980s  ###################
###############################################################################

uspC<-lm(ShareUspPerThs1982~ divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion, data=dat)
summary(uspC)

rzemC<-lm(WRzemPrPerThs82~divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion, data=dat)
summary(rzemC)

punktyC<-lm(log(PunktyPerThous1980) ~ divMig+ shareMigrants+ PctUrb1948 +SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion, data=dat)

telC<-lm(log(TelsPerThs1980) ~divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950 +DistLandBorder + distrail48+ GermanRegion, data=dat) 
summary(telC) 

tvC<-lm(log(TVsPerThs1980)~divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion, data=dat)
summary(tvC)

stargazer(uspC, rzemC, punktyC, telC, tvC, digits=2, style="APSR",  omit=c("GermanRegion", "Constant", "PctUrb1948", "SharePrzemysl1939", "ShareOver100", "DistPow1950", "DistLandBorder", "distrail48", "TotPop48"), omit.stat=c("rsq","ser","f"), add.lines=list(c("Covariates", "Yes", "Yes", "Yes","Yes", "Yes", "Yes", "Yes"), c("Region Fixed Effects", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes")), star.cutoffs=c(0.1, 0.05, 0.01), star.char=c("+", "*", "**"), notes = c("+ p<0.1; * p<0.05; ** p<0.01"), notes.append = F) 

###########################################################################################
###### Figure 5: Gross Income and GDP in Provinces affected by Population Transfers #######
###########################################################################################
gdpdat<-read.csv("woj49aggr.csv")

GDP<-lm(PKBpc95Zl~ divMig, data= gdpdat)

GrInc<-lm(GrossPrInc_pc_95 ~ divMig, data= gdpdat)

par(mfrow=c(1,2))
visreg(GDP, "divMig", ylab="GDP per capita (1995)", xlab="Migrant Diversity (1948)", fill.par=list(density = 30, angle = 45, col="gray"), line.par=list(col="black"))
visreg(GrInc, "divMig", ylab="Gross income per capita (1995)", xlab="Migrant Diversity (1948)", fill.par=list(density = 30, angle = 45, col="gray"), line.par=list(col="black"), ADD=TRUE)


##############################################################################################
#### Table A.9 in the Appendix: Economic Outcomes in Models without Spatial Filtering  #######
##############################################################################################

inc1995pcNM4<-lm(log(persIncTax1995pc) ~  divMig+ shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion, data=dat)
inc1998pcNM4<-lm(log(persIncTax1998pc) ~  divMig+ shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion, data=dat)
inc2000pcNM4<-lm(log(persIncTax2000pc) ~  divMig+ shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion, data=dat)
ent1995pcNM4<-lm(log(PodmPryw1995perthous) ~  divMig+ shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion, data=dat)
ent1998pcNM4<-lm(log(PodmPryw1998perthous) ~  divMig+ shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion, data=dat)
ent2000pcNM4<-lm(log(PodmPryw2000perthous) ~  divMig+ shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion, data=dat)

stargazer(inc1995pcNM4, inc1998pcNM4, inc2000pcNM4, ent1995pcNM4, ent1998pcNM4, ent2000pcNM4, digits=2, style = 'APSR', dep.var.labels=c("1995","1998", "2000", "1995", "1998", "2000"), omit=c("GermanRegion", "Constant"),  omit.stat=c("rsq","ser","f"), column.labels=c("Ln(Personal Income Tax per capita)", "Ln(Private Enterprises per 1,000)"), column.separate=c(3,3), add.lines=list(c("District FE", "Yes", "Yes", "Yes","Yes", "Yes", "Yes")), star.cutoffs=c(0.1, 0.05, 0.01), star.char=c("+", "*", "**"), notes = c("+ p<0.1; * p<0.05; ** p<0.01"), notes.append = F)


#######################################################################
###### Table 4 in the Article : Economic activity in the 1990s ########
#######################################################################

######## Implementing Spatial Filtering  

#SpatialFiltering function selects eigenvectors in a semi-parametric spatial filtering approach to removing spatial dependence from linear models. Selection is by brute force by finding the single eigenvector reducing the standard variate of Moran's I for regression residuals most, and continuing until no candidate eigenvector reduces the value by more than tol. It returns a summary table from the selection process and a matrix of selected eigenvectors for the specified model. 

#To expedite the process on a Mac, switch to faster vecLib library  via Terminal :
 #cd /Library/Frameworks/R.framework/Resources/lib
#ln -sf /System/Library/Frameworks/Accelerate.framework/Frameworks/vecLib.framework/Versions/Current/libBLAS.dylib libRblas.dylib 
 
load("contempgm.RData") #Change the directory accordingly
proj4string(shape)<- CRS("+proj=tmerc +lat_0=0 +lon_0=19 +k=0.9993 +x_0=500000 +y_0=-5300000 +ellps=GRS80 +units=m +no_defs")
datNoNA<-dat[which(!is.na(dat$SharePrzemysl1939) & !is.na(dat$divMig)),] #Exclude NAs for spatialfiltering to work properly. Note: 1939 census excludes Danzig/Gdansk
shape1<-merge(shape[which(shape$Kod_Teryt %in% datNoNA$Kod_Teryt),c(1,5)], datNoNA, by="Kod_Teryt", all.x=TRUE) 

xy <- coordinates(shape1) #This gets coordinates of centers of polygons
W_nb <- poly2nb(shape1, queen = T)
##visualize queen neighbors
plot(shape1, col="gray", border="blue")
plot(W_nb, xy, add=TRUE, pch=20)
#Compute spatial weights:
W_cont_matW <- nb2listw(W_nb, style = "W", zero.policy = T)

## Income per capita in 1995

inc1995pc<-lm(log(persIncTax1995pc) ~  divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion, data= shape1)
summary(inc1995pc)
res95inc <- inc1995pc$residuals
moran.test(res95inc, listw=W_cont_matW, zero.policy=T)
#Moran I statistic of 0.45 indicates considerable spatial autocorrelation and the need for spatial filtering.

old <- Sys.time() 
eigInc95<- SpatialFiltering(log(persIncTax1995pc) ~ divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion, data = shape1, nb = W_nb, style="W", tol=0.2, ExactEV = T, zero.policy = T) #
new <- Sys.time() #- old # calculate difference
old-new  #Time difference of 4.27 min

inc1995pcF<-lm(log(persIncTax1995pc) ~  divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion+fitted(eigInc95), data= shape1)
summary(inc1995pcF)

## Income per capita in 1998

inc1998pc<-lm(log(persIncTax1998pc) ~  divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion, data= shape1)
summary(inc1998pc)
res98inc <- inc1998pc$residuals
moran.test(res98inc, listw=W_cont_matW, zero.policy=T)
#Moran I statistic of 0.25 indicates spatial autocorrelation and the need for spatial filtering.
old <- Sys.time() 
eigInc98<- SpatialFiltering(log(persIncTax1998pc) ~ divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion, data = shape1, nb = W_nb, style="W", tol=0.2, ExactEV = T, zero.policy = T) #
new <- Sys.time() #- old # calculate difference
old-new #2.39 mins

inc1998pcF<-lm(log(persIncTax1998pc) ~  divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion+fitted(eigInc98), data= shape1)
summary(inc1998pcF)

## Income per capita in 2000

inc2000pc<-lm(log(persIncTax2000pc) ~  divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion, data= shape1)
summary(inc2000pc)
res00inc <- inc2000pc$residuals
moran.test(res00inc, listw=W_cont_matW, zero.policy=T)
#Moran I statistic of 0.17 indicates mild spatial autocorrelation
old <- Sys.time() 
eigInc00<- SpatialFiltering(log(persIncTax2000pc) ~ divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion, data = shape1, nb = W_nb, style="W", tol=0.2, ExactEV = T, zero.policy = T) #
new <- Sys.time() 
old-new #Takes 2.47 min

inc2000pcF<-lm(log(persIncTax2000pc) ~  divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion+fitted(eigInc00), data= shape1)
summary(inc2000pcF)

## Income per capita in 1993 (recalculate spatial weights bc of additional NAs in the DV)
inc1993pc<-lm(log(persIncTax1993pc) ~  divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion, data= shape1[!is.na(shape1$persIncTax1993pc),])
summary(inc1993pc)
res93inc <- inc1993pc$residuals
moran.test(res93inc, listw=nb2listw(poly2nb(shape1[!is.na(shape1$persIncTax1993pc),], queen = T), style = "W", zero.policy = T), zero.policy=T)
#Moran I statistic of 0.01 indicates no spatial autocorrelation and no need for spatial filtering

#Private enterprises in 1995
ent1995pc<-lm(log(PodmPryw1995perthous) ~  divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion, data=shape1)
summary(ent1995pc)   
res95ent <- ent1995pc$residuals
moran.test(res95ent, listw=W_cont_matW, zero.policy=T)
#Moran I statistic of 0.30 indicates modest spatial autocorrelation and the need for spatial filtering.

old <- Sys.time() 
eigEnt95<- SpatialFiltering(log(PodmPryw1995perthous) ~ divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion, data = shape1, nb = W_nb, style="W", tol=0.2, ExactEV = T, zero.policy = T) #
new <- Sys.time() #- old # calculate difference
old-new  #Time difference of 3.66 min.
ent1995pcF<-lm(log(PodmPryw1995perthous) ~  divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion+fitted(eigEnt95), data= shape1)
summary(ent1995pcF)

ent1998pc<-lm(log(PodmPryw1998perthous) ~  divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion, data=shape1)
summary(ent1998pc)
res98ent <- ent1998pc$residuals
moran.test(res98ent, listw=W_cont_matW, zero.policy=T)
#Moran I statistic of 0.23 indicates mild spatial autocorrelation

old <- Sys.time() 
eigEnt98<- SpatialFiltering(log(PodmPryw1998perthous) ~ divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion, data = shape1, nb = W_nb, style="W", tol=0.2, ExactEV = T, zero.policy = T) #
new <- Sys.time() #- old # calculate difference
old-new  #Time difference of 3.22 min.
ent1998pcF<-lm(log(PodmPryw1998perthous) ~  divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion+fitted(eigEnt98), data= shape1)
summary(ent1998pcF)   

ent2000pc<-lm(log(PodmPryw2000perthous) ~  divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion, data=shape1)
summary(ent2000pc)
res00ent <- ent2000pc$residuals
moran.test(res00ent, listw=W_cont_matW, zero.policy=T)
#Moran I statistic of 0.25 indicates mild spatial autocorrelation

old <- Sys.time() 
eigEnt00<- SpatialFiltering(log(PodmPryw2000perthous) ~ divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion, data = shape1, nb = W_nb, style="W", tol=0.2, ExactEV = T, zero.policy = T) #
new <- Sys.time()    
old-new #2.96 min.
ent2000pcF<-lm(log(PodmPryw2000perthous) ~  divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion+fitted(eigEnt00), data=shape1)
summary(ent2000pcF)

##Table 4 in the article 
stargazer(inc1993pc, inc1995pcF, inc1998pcF, inc2000pcF, ent1995pcF, ent1998pcF, ent2000pcF, style="APSR", digits=2, omit=c("GermanRegion", "fitted", "Constant"), omit.stat=c("rsq","ser","f"), add.lines=list(c("Covariates", "Yes", "Yes", "Yes","Yes", "Yes", "Yes", "Yes"), c("Region Fixed Effects", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes"), c("Moran Eigenvectors", "No", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes")), column.labels=c("1993", "1995", "1998", "2000","1995","1998","2000" ), star.cutoffs=c(0.1, 0.05, 0.01), star.char=c("+", "*", "**"), notes = c("+ p<0.1; * p<0.05; ** p<0.01"), notes.append = F)


###################################################################
########### Exploring Mechanisms using  G estimation.  ############
###################################################################
### Note: g-estimation analysis and sensitity code is adapted from Acharya, Avidit, Matthew Blackwell, and Maya Sen. 2016. “Explaining Causal Findings Without Bias: Detecting and Assessing Direct Effects.” American Political Science Review 110 (3): 512- 529. 

### Function for g estimation variance (Acharya, Blackwell, and Sen 2016)

seq.g.var <- function(mod.first, mod.direct, med.vars) {
  require(sandwich)
  mat.x <- model.matrix(mod.direct)
  mat.first <- model.matrix(mod.first)
  n <- nrow(mat.x)
  Fhat <- crossprod(mat.x, mat.first)/n
  Fhat[, !(colnames(mat.first) %in% med.vars)] <- 0
  Mhat.inv <- solve(crossprod(mat.first)/n)
  ghat <- t(estfun(mod.direct)) + Fhat %*% Mhat.inv %*% t(estfun(mod.first))
  meat <- crossprod(t(ghat))/n
  bread <- (t(mat.x)%*%mat.x)/n
  vv <- (n/(n-ncol(mat.x)))*(solve(bread) %*% meat %*% solve(bread))/n
  return(vv)
}

#Model with intermediate covariates

firstOSPEnt<-lm(log(PodmPryw1998perthous) ~  OSPna10tys1995 + PunktyPerThous1980 +TelsPerThs1980 + TVsPerThs1980+Szkoly1982 +WRzemPrPerThs82 + ShareUspPerThs1982 + PGR+divMig+shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) + DistPow1950 +DistLandBorder + distrail48+ GermanRegion +fitted(eigEnt98), data=shape1)

#Model without intermediate covariates
directOSPEnt <- lm(I(log(PodmPryw1998perthous) - coef(firstOSPEnt)["OSPna10tys1995"]* OSPna10tys1995) ~ divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion +fitted(eigEnt98), data = as.data.frame(shape1), subset = rownames(as.data.frame(shape1)) %in% rownames(model.matrix(firstOSPEnt)))
directOSPEnt.vcov <- seq.g.var(firstOSPEnt, directOSPEnt, "OSPna10tys1995")
coeftest(directOSPEnt, vcov = directOSPEnt.vcov)
OSPEnt.se <- sqrt(diag(directOSPEnt.vcov))


###Alternative mediator: property tax amount per capita

firstTaxEnt<-lm(log(PodmPryw1998perthous) ~  log(PropTax9395PC) + PunktyPerThous1980 +TelsPerThs1980 + TVsPerThs1980+Szkoly1982 +WRzemPrPerThs82 + ShareUspPerThs1982 + PGR +divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) + DistPow1950 +DistLandBorder + distrail48+ GermanRegion +fitted(eigEnt98), data=shape1)

directTaxEnt <- lm(I(log(PodmPryw1998perthous) - coef(firstTaxEnt)["log(PropTax9395PC)"]* log(PropTax9395PC)) ~ divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion +fitted(eigEnt98), data = shape1, subset = rownames(as.data.frame(shape1)) %in% rownames(model.matrix(firstTaxEnt)))

directTaxEnt.vcov <- seq.g.var(firstTaxEnt, directTaxEnt, "log(PropTax9395PC)")
coeftest(directTaxEnt, vcov = directTaxEnt.vcov)
TaxEnt.se <- sqrt(diag(directTaxEnt.vcov))

##### Income and Volunteer Fire Brigades as a Mediator  
firstOSPInc<-lm(log(persIncTax1998pc) ~  OSPna10tys1995 + PunktyPerThous1980 +TelsPerThs1980 + TVsPerThs1980+Szkoly1982 +WRzemPrPerThs82 + ShareUspPerThs1982 + PGR+ divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+GermanRegion+fitted(eigInc98), data= shape1)  
directOSPInc <- lm(I(log(persIncTax1998pc) - coef(firstOSPInc)["OSPna10tys1995"]* OSPna10tys1995) ~ divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+GermanRegion +fitted(eigInc98), data = shape1, subset = rownames(as.data.frame(shape1)) %in% rownames(model.matrix(firstOSPInc)))

directOSPInc.vcov <- seq.g.var(firstOSPInc, directOSPInc, "OSPna10tys1995")
coeftest(directOSPInc, vcov = directOSPInc.vcov)
OSPInc.se <- sqrt(diag(directOSPInc.vcov))

#Income and Property Tax Amounts as mediator
firstTaxInc<-lm(log(persIncTax1998pc) ~  log(PropTax9395PC) + PunktyPerThous1980 +TelsPerThs1980 + TVsPerThs1980+Szkoly1982 +WRzemPrPerThs82 + ShareUspPerThs1982 + PGR+ divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100 +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+GermanRegion+fitted(eigInc98), data= shape1)

directTaxInc <- lm(I(log(persIncTax1998pc) - coef(firstTaxInc)["log(PropTax9395PC)"]* log(PropTax9395PC)) ~ divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+GermanRegion+fitted(eigInc98), data = shape1, subset = rownames(as.data.frame(shape1)) %in% rownames(model.matrix(firstTaxInc)))

directTaxInc.vcov <- seq.g.var(firstTaxInc, directTaxInc, "log(PropTax9395PC)")
coeftest(directTaxInc, vcov = directTaxInc.vcov)   
TaxInc.se <- sqrt(diag(directTaxInc.vcov))


########################################################################
### Figure 6 : Coefficient on Migrant Diversity and 95% CIs ############
########################################################################
par(mfrow=c(1,2))
plot(x = NA, xlim = c(-0.0075,0.4), ylim = c(5,10), bty = "n", yaxt = "n", ylab = "", xlab = "Coefficient on Migrant Diversity", main="Private enterprises")
abline(v = 0, col = "grey", lty = 2)
grid(nx = NULL, ny = NULL, col = "lightgray", lty = "dotted")
text(x = coef(ent1998pcF)["divMig"], y = 9.5, "Baseline estimate")
points(x = coef(ent1998pcF)["divMig"], y = 10, pch = 19)
segments(x0 = confint(ent1998pcF)["divMig",1], x1 = confint(ent1998pcF)["divMig",2], y0 = 10)
text(x = coef(directOSPEnt)["divMig"], y = 7.5, "Direct effect, net fire brigades")
points(x = coef(directOSPEnt)["divMig"], y = 8, pch = 19)
segments(x0 = coef(directOSPEnt)["divMig"] - 2*OSPEnt.se["divMig"], x1 = coef(directOSPEnt)["divMig"] + 2*OSPEnt.se["divMig"], y0 = 8)
text(x = coef(directTaxEnt)["divMig"], y = 5.5, "Direct effect, net tax revenues")
points(x = coef(directTaxEnt)["divMig"], y = 6, pch = 19)
segments(x0 = coef(directTaxEnt)["divMig"] - 2*TaxEnt.se["divMig"], x1 = coef(directTaxEnt)["divMig"] + 2*TaxEnt.se["divMig"], y0 = 6)

plot(x = NA, xlim = c(-0.0075,0.4), ylim = c(5,10), bty = "n", yaxt = "n", ylab = "", xlab = "Coefficient on Migrant Diversity", main="Personal income tax")
abline(v = 0, col = "grey", lty = 2)
grid(nx = NULL, ny = NULL, col = "lightgray", lty = "dotted")
text(x = coef(inc1998pcF)["divMig"], y = 9.5, "Baseline estimate")
points(x = coef(inc1998pcF)["divMig"], y = 10, pch = 19)
segments(x0 = confint(inc1998pcF)["divMig",1], x1 = confint(inc1998pcF)["divMig",2], y0 = 10)
text(x = coef(directOSPInc)["divMig"], y = 7.5, "Direct effect, net fire brigades")
points(x = coef(directOSPInc)["divMig"], y = 8, pch = 19)
segments(x0 = coef(directOSPInc)["divMig"] - 2*OSPInc.se["divMig"], x1 = coef(directOSPInc)["divMig"] + 2*OSPInc.se["divMig"], y0 = 8)
text(x = coef(directTaxInc)["divMig"], y = 5.5, "Direct effect, net tax revenues")
points(x = coef(directTaxInc)["divMig"], y = 6, pch = 19)
segments(x0 = coef(directTaxInc)["divMig"] - 2*TaxInc.se["divMig"], x1 = coef(directTaxInc)["divMig"] + 2*TaxInc.se["divMig"], y0 = 6)


###################################################
######### Table A16 in the Online Appendix  #######
###################################################

stargazer(firstOSPEnt, directOSPEnt, firstTaxEnt, directTaxEnt, firstOSPInc, directOSPInc, firstTaxInc, directTaxInc, se=list(NULL, OSPEnt.se[2], NULL, TaxEnt.se[2], NULL, OSPInc.se[2], NULL, TaxInc.se[2]),digits=2, style = 'APSR', dep.var.labels.include=FALSE, column.labels=c("ln(Enterprises per 1000 people)", "ln(Personal Income Tax)"), column.separate=c(4, 4), omit=c("eigInc98", "eigEnt98","GermanRegion", "Constant","DistLandBorder", "distrail48", "DistPow1950","TotPop48", "ShareOver100", "PctUrb1948", "SharePrzemysl1939", "PGR", "ShareWysze78", "PtArrived7188", "ShareUspPerThs1982", "TVsPerThs1980", "WRzemPrPerThs82", "TelsPerThs1980", "Szkoly1982", "PunktyPerThous1980"),  omit.stat=c("rsq","ser","f"), add.lines=list(c("Intermediate covariates", "Yes","No","Yes","No","Yes","No","Yes","No"), c("Pre-Treatment Covariates", "Yes", "Yes", "Yes","Yes", "Yes", "Yes", "Yes", "Yes"), c("District Fixed Effects", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Ye"), c("Moran Eigenvectors", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes")), star.cutoffs=c(0.1, 0.05, 0.01), star.char=c("+", "*", "**"), notes = c("+ p<0.1; * p<0.05; ** p<0.01"), notes.append = F)

###############################################################
######### Sensitivity Analysis Figures for the Appendix #######
###############################################################

##Perform sensitivity analysis to bias due to unmeasured intermediate confounders. The sequential unconfoundedness assumption implies that ρ = 0. By varying ρ, we can vary the severity of the un- measured confounding for the effect of Mi. 

rho <- seq(-0.9,0.9, by = 0.05)
acde.sens <- rep(NA, times = length(rho))
acde.sens.se <- rep(NA, times = length(rho))
res.y <- residuals(lm(log(PodmPryw1998perthous) ~log(PunktyPerThous1980) + PunktyPerThous1980 +TelsPerThs1980 + TVsPerThs1980+Szkoly1982 +WRzemPrPerThs82 + ShareUspPerThs1982 + PGR+divMig+shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) + DistPow1950 +DistLandBorder + distrail48+ GermanRegion +fitted(eigEnt98), data = as.data.frame(shape1), subset = !is.na(OSPna10tys1995))) 
res.m <- residuals(lm(OSPna10tys1995 ~  log(PunktyPerThous1980) + PunktyPerThous1980 +TelsPerThs1980 + TVsPerThs1980+Szkoly1982 +WRzemPrPerThs82 + ShareUspPerThs1982 +PGR+ divMig+shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) + DistPow1950 +DistLandBorder + distrail48+ GermanRegion +fitted(eigEnt98), data = as.data.frame(shape1))) #predicting mediator from first
rho.tilde <- cor(res.y, res.m)
m.fixed <- coef(firstOSPEnt)["OSPna10tys1995"] - rho*sd(res.y)*sqrt((1-rho.tilde^2)/(1-rho^2))/sd(res.m)
n <- nrow(model.matrix(directOSPEnt))
for(i in 1:length(rho)) {
  sens.direct  <- lm(I(log(PodmPryw1998perthous) - m.fixed[i]* OSPna10tys1995) ~divMig+shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) + DistPow1950 +DistLandBorder + distrail48+ GermanRegion +fitted(eigEnt98), data = as.data.frame(shape1), subset = rownames(as.data.frame(shape1)) %in% rownames(model.matrix(firstOSPEnt)))
  acde.sens[i] <- coef(sens.direct)["divMig"]
  sens.vcov <- seq.g.var(firstOSPEnt, sens.direct, "OSPna10tys1995")
  acde.sens.se[i] <- sqrt(sens.vcov["divMig", "divMig"])
}
ci.hi <- acde.sens + 1.96 * acde.sens.se
ci.lo <- acde.sens - 1.96 * acde.sens.se


##Sensitivity analysis for income 
rho2 <- seq(-0.9,0.9, by = 0.05)
acde.sens2 <- rep(NA, times = length(rho2))
acde.sens.se2 <- rep(NA, times = length(rho2))
res.y2 <- residuals(lm(log(persIncTax1998pc) ~  log(PunktyPerThous1980) + PunktyPerThous1980 +TelsPerThs1980 + TVsPerThs1980+Szkoly1982 +WRzemPrPerThs82 + ShareUspPerThs1982 + PGR + divMig+shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) + DistPow1950 +DistLandBorder + distrail48+ GermanRegion +fitted(eigInc98), data = as.data.frame(shape1), subset = !is.na(OSPna10tys1995))) 
res.m2 <- residuals(lm(OSPna10tys1995 ~  log(PunktyPerThous1980) + PunktyPerThous1980 +TelsPerThs1980 + TVsPerThs1980+Szkoly1982 +WRzemPrPerThs82 + ShareUspPerThs1982 + PGR+divMig+shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) + DistPow1950 +DistLandBorder + distrail48+ GermanRegion +fitted(eigInc98), data = as.data.frame(shape1))) #predicting mediator from first
rho.tilde2 <- cor(res.y2, res.m2)
m.fixed2 <- coef(firstOSPEnt)["OSPna10tys1990s"] - rho2*sd(res.y2)*sqrt((1-rho.tilde2^2)/(1-rho2^2))/sd(res.m2)
n <- nrow(model.matrix(directOSPEnt))
for(i in 1:length(rho2)) {
  sens.direct  <- lm(I(log(persIncTax1998pc) - m.fixed[i]* OSPna10tys1995) ~divMig+shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) + DistPow1950 +DistLandBorder + distrail48+ GermanRegion +fitted(eigInc98), data = as.data.frame(shape1), subset = rownames(as.data.frame(shape1)) %in% rownames(model.matrix(firstOSPEnt)))
  acde.sens2[i] <- coef(sens.direct)["divMig"]
  sens.vcov2 <- seq.g.var(firstOSPEnt, sens.direct, "OSPna10tys1995")
  acde.sens.se2[i] <- sqrt(sens.vcov2["divMig", "divMig"])
}

ci.hi2 <- acde.sens2 + 1.96 * acde.sens.se2
ci.lo2 <- acde.sens2 - 1.96 * acde.sens.se2
############################################
######## Figure A11 in the Appendix.  ######
############################################

par(mfrow=c(1,2))
plot(rho, acde.sens, type = 'n', ylim = range(c(ci.hi,ci.lo)), xlab = bquote("Correlation between mediator and outcome errors" ~~ (rho)),
     ylab = "Estimated ACDE, net volunteer fire brigades", bty = "n", las = 1, main="Private Enterprises")
polygon(x = c(rho, rev(rho)), y = c(ci.lo, rev(ci.hi)), col = "grey70", border = NA)
lines(rho, acde.sens, lwd = 2)
abline(v=0, lty = 2)
abline(h=0, lty = 2)
plot(rho2, acde.sens2, type = 'n', ylim = range(c(ci.hi2,ci.lo2)), xlab = bquote("Correlation between mediator and outcome errors" ~~ (rho)),
     ylab = "Estimated ACDE, net volunteer fire brigades", bty = "n", las = 1, main="Personal Income Tax")
polygon(x = c(rho2, rev(rho2)), y = c(ci.lo2, rev(ci.hi2)), col = "grey70", border = NA)
lines(rho2, acde.sens2, lwd = 2)
abline(v=0, lty = 2)
abline(h=0, lty = 2)


##############################################################################
###### Additional analyses and robustness checks included in the Appendix ####
##############################################################################

####################################################
#### Table A.5: Models omitting Share Migrants #####
####################################################

ospNM1<-lm(OSPna10tys1995 ~ divMig, data=dat) 
ospNM2<-lm(OSPna10tys1995 ~ divMig+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48 +GermanRegion, data=dat) 
strazNM1<-glm(Straz2007 ~ divMig, data=dat,family=binomial(link='logit')) 
strazNM2<-glm(Straz2007 ~ divMig+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48 +GermanRegion, data=dat,family=binomial(link='logit')) 
propTaxpcNM1<-lm(log(PropTax9395PC) ~ divMig, data=dat) 
propTaxpcNM2<-lm(log(PropTax9395PC) ~ divMig+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48 +GermanRegion, data=dat) 

stargazer(ospNM1, ospNM2, strazNM1, strazNM2, propTaxpcNM1, propTaxpcNM2, digits=2, style = 'APSR', dep.var.labels.include=FALSE, omit=c("GermanRegion", "Constant","DistLandBorder", "distrail48", "DistPow1950","TotPop48", "ShareOver100", "PctUrb1948", "SharePrzemysl1939"),  omit.stat=c("rsq","ser","f"), column.labels=c("Volunteer Fire Brigades", "Municipal Guard", "ln(Property Tax Revenue)"), column.separate=c(2,2, 2), add.lines=list(c("Covariates", "No", "Yes", "No", "Yes", "No", "Yes"), c("District FE", "No", "Yes", "No", "Yes", "No", "Yes")), star.cutoffs=c(0.1, 0.05, 0.01), star.char=c("+", "*", "**"), notes = c("+ p<0.1; * p<0.05; ** p<0.01"), notes.append = F)

#########################################################################
#### Table A.7: Models omitting Share Migrants and/or no covariates #####
#########################################################################

punkty1980NM1<-lm(log(PunktyPerThous1980) ~ divMig, data=dat) 
punkty1980NM2<-lm(log(PunktyPerThous1980) ~ divMig+ shareMigrants, data=dat) 
punkty1980NM3<-lm(log(PunktyPerThous1980) ~ divMig+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48 +GermanRegion, data=dat) 
tels1980NM1<-lm(log(TelsPerThs1980) ~ divMig, data=dat)
tels1980NM2<-lm(log(TelsPerThs1980) ~ divMig+ shareMigrants, data=dat) 
tels1980NM3<-lm(log(TelsPerThs1980) ~ divMig+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48 +GermanRegion, data=dat) 

stargazer(punkty1980NM1, punkty1980NM2, punkty1980NM3, tels1980NM1, tels1980NM2, tels1980NM3,  digits=2, style = 'APSR', dep.var.labels.include=FALSE, omit=c("GermanRegion", "Constant","DistLandBorder", "distrail48", "DistPow1950","TotPop48", "ShareOver100", "PctUrb1948", "SharePrzemysl1939"),  omit.stat=c("rsq","ser","f"), column.labels=c("Ln(Telephones per 1000)", "Ln(Shops per 1000)"), column.separate=c(3,3), add.lines=list(c("Covariates", "No", "No","Yes", "No", "No", "Yes"), c("District FE", "No", "No", "Yes", "No", "No", "Yes")), star.cutoffs=c(0.1, 0.05, 0.01), star.char=c("+", "*", "**"), notes = c("+ p<0.1; * p<0.05; ** p<0.01"), notes.append = F)

#########################################################################
#### Table A.8: Models omitting Share Migrants and/or no covariates #####
#########################################################################

###Contemporary economic indicators:
inc1995pcNM1<-lm(log(persIncTax1995pc) ~  divMig, data=dat)
inc1995pcNM2<-lm(log(persIncTax1995pc) ~  divMig+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion, data=dat)

inc1998pcNM1<-lm(log(persIncTax1998pc) ~  divMig, data=dat)
inc1998pcNM2<-lm(log(persIncTax1998pc) ~  divMig+  PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion, data=dat)

inc2000pcNM1<-lm(log(persIncTax2000pc) ~  divMig, data=dat)
inc2000pcNM2<-lm(log(persIncTax2000pc) ~  divMig+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion, data=dat)

ent1995pcNM1<-lm(log(PodmPryw1995perthous) ~  divMig, data=dat)
ent1995pcNM2<-lm(log(PodmPryw1995perthous) ~  divMig+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion, data=dat)

ent1998pcNM1<-lm(log(PodmPryw1998perthous) ~  divMig, data=dat)
ent1998pcNM2<-lm(log(PodmPryw1998perthous) ~  divMig+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion, data=dat)

ent2000pcNM1<-lm(log(PodmPryw2000perthous) ~  divMig, data=dat)
ent2000pcNM2<-lm(log(PodmPryw2000perthous) ~  divMig+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion, data=dat)

stargazer(inc1995pcNM1, inc1995pcNM2, inc1998pcNM1, inc1998pcNM2, ent1995pcNM1, ent1995pcNM2, ent1998pcNM1, ent1998pcNM2,  digits=2, style = 'APSR', dep.var.labels.include=FALSE, omit=c("GermanRegion", "Constant","DistLandBorder", "distrail48", "DistPow1950","TotPop48", "ShareOver100", "PctUrb1948", "SharePrzemysl1939"),  omit.stat=c("rsq","ser","f"), column.labels=c("Ln(Personal Income Tax per capita)", "Ln(Private Enterprises per 1,000"), column.separate=c(4,4), add.lines=list(c("1995","1995", "1998", "1998", "1995","1995", "1998", "1998" ),c("Covariates", "No", "Yes", "No","Yes",  "No", "Yes", "No","Yes"), c("District FE", "No", "Yes", "No","Yes", "No", "Yes", "No","Yes")), star.cutoffs=c(0.1, 0.05, 0.01), star.char=c("+", "*", "**"), notes = c("+ p<0.1; * p<0.05; ** p<0.01"), notes.append = F)

#########################################################################
#### Table A.8: Models with Migrant Diversity and Share Migrants #####
#########################################################################

inc1995pcNM3<-lm(log(persIncTax1995pc) ~  divMig+ shareMigrants, data=dat)
inc1998pcNM3<-lm(log(persIncTax1998pc) ~  divMig+ shareMigrants, data=dat)
inc2000pcNM3<-lm(log(persIncTax2000pc) ~  divMig+ shareMigrants, data=dat)
ent1995pcNM3<-lm(log(PodmPryw1995perthous) ~  divMig+ shareMigrants, data=dat)
ent1998pcNM3<-lm(log(PodmPryw1998perthous) ~  divMig+ shareMigrants, data=dat)
ent2000pcNM3<-lm(log(PodmPryw2000perthous) ~  divMig+ shareMigrants, data=dat)

stargazer(inc1995pcNM3, inc1998pcNM3, inc2000pcNM3, ent1995pcNM3, ent1998pcNM3, ent2000pcNM3, digits=2, style = 'APSR', dep.var.labels.include=FALSE, omit=c("GermanRegion", "Constant","DistLandBorder", "distrail48", "DistPow1950","TotPop48", "ShareOver100", "PctUrb1948", "SharePrzemysl1939"),  omit.stat=c("rsq","ser","f"), column.labels=c("Ln(Personal Income Tax per capita)", "Ln(Private Enterprises per 1,000)"), column.separate=c(3,3), add.lines=list(c("Year","1995","1998", "2000", "1995", "1998", "2000"), c("Covariates", "No", "No", "No","No", "No", "No")), star.cutoffs=c(0.1, 0.05, 0.01), star.char=c("+", "*", "**"), notes = c("+ p<0.1; * p<0.05; ** p<0.01"), notes.append = F)


######################################################
####### Table A.10: Models excluding Cities     #####
######################################################

inc1995nocities<-lm(log(persIncTax1995pc) ~  divMig+ shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion, data=dat[dat$PctUrb1948<0.85,])
inc1998nocities<-lm(log(persIncTax1998pc) ~  divMig+ shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion, data=dat[dat$PctUrb1948<0.85,])
inc2000nocities<-lm(log(persIncTax2000pc) ~  divMig+ shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion, data=dat[dat$PctUrb1948<0.85,])
ent1995nocities<-lm(log(PodmPryw1995perthous) ~  divMig+ shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion, data=dat[dat$PctUrb1948<0.85,])
ent1998nocities<-lm(log(PodmPryw1998perthous) ~  divMig+ shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion, data=dat[dat$PctUrb1948<0.85,])
ent2000nocities<-lm(log(PodmPryw2000perthous) ~  divMig+ shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion, data=dat[dat$PctUrb1948<0.85,])

stargazer(inc1995nocities, inc1998nocities, inc2000nocities, ent1995nocities, ent1998nocities, ent2000nocities, digits=2, style = 'APSR', dep.var.labels=c("1995","1998", "2000", "1995", "1998", "2000"), omit=c("GermanRegion", "Constant","DistLandBorder", "distrail48", "DistPow1950","TotPop48", "ShareOver100", "PctUrb1948", "SharePrzemysl1939"), omit.stat=c("rsq","ser","f"), column.labels=c("Ln(Personal Income Tax per capita)", "Ln(Private Enterprises per 1,000"), column.separate=c(3,3), add.lines=list(c("Covariates", "Yes", "Yes", "Yes","Yes", "Yes", "Yes"), c("District FE", "Yes", "Yes", "Yes","Yes", "Yes", "Yes")), star.cutoffs=c(0.1, 0.05, 0.01), star.char=c("+", "*", "**"), notes = c("+ p<0.1; * p<0.05; ** p<0.01"), notes.append = F)

###########################################################
#### Table A.11: Models with Intermediate Variables   #####
###########################################################

income1<-lm(log(persIncTax2000pc) ~  OSPna10tys1995 + PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion, data=dat)

enterp1<-lm(log(PodmPryw2000perthous) ~  OSPna10tys1995 + PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion, data=dat)

income2<-lm(log(persIncTax2000pc) ~  avgProptax9395 + PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion, data=dat)

enterp2<-lm(log(PodmPryw2000perthous) ~  avgProptax9395 + PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion, data=dat)

income3<-lm(log(persIncTax2000pc) ~  log(PropTax9395PC) + PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion, data=dat)

enterp3<-lm(log(PodmPryw2000perthous) ~  log(PropTax9395PC) + PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanRegion, data=dat)

stargazer(income1, income2, income3, enterp1, enterp2, enterp3, digits=2, style = 'APSR', omit=c("GermanRegion", "Constant","DistLandBorder", "distrail48", "DistPow1950","TotPop48", "ShareOver100", "PctUrb1948", "SharePrzemysl1939"), omit.stat=c("rsq","ser","f"), dep.var.labels.include=FALSE, column.labels=c("ln(Personal Income Tax per capita)", "ln(Private Enterprises per 1,000)"), column.separate=c(3,3), add.lines=list(c("Covariates", "Yes", "Yes", "Yes","Yes", "Yes", "Yes"), c("District FE", "Yes", "Yes", "Yes","Yes", "Yes", "Yes")), star.cutoffs=c(0.1, 0.05, 0.01), star.char=c("+", "*", "**"), notes = c("+ p<0.1; * p<0.05; ** p<0.01"), notes.append = F)

#################################################################
#### Table A.12: Taxes and Public Goods Spending in 1993-95 #####
#################################################################

spendInvT<-lm(log(CapSpPC)~log(PropTax9395PC), data=dat)
spendInvT2<-lm(log(CapSpPC)~log(PropTax9395PC)+PctUrb1948+DistPow1950+ distrail48+ ShareOver100 + DistLandBorder+ distrail48+ DistPow1950+log(TotPop48)+ SharePrzemysl1939+ GermanRegion, data=dat)
spendInvR<-lm(log(CapSpPC)~ avgProptax9395, data=dat)
spendInvR2<-lm(log(CapSpPC)~ avgProptax9395 +PctUrb1948+DistPow1950+ distrail48+ ShareOver100 + DistLandBorder+ distrail48+ DistPow1950+log(TotPop48)+ SharePrzemysl1939+ GermanRegion, data=dat)
stargazer(spendInvT, spendInvT2, spendInvR, spendInvR2, style = 'APSR', digits=2, omit=c("GermanRegion", "Constant","DistLandBorder", "distrail48", "DistPow1950","TotPop48", "ShareOver100", "PctUrb1948", "SharePrzemysl1939"), omit.stat=c("rsq","ser","f"), dep.var.labels.include=FALSE, column.labels=c("ln(Capital Spending per capita)"), column.separate=c(4), add.lines=list(c("Covariates", "No", "Yes", "No","Yes"), c("District FE", "No", "Yes", "No","Yes")), star.cutoffs=c(0.1, 0.05, 0.01), star.char=c("+", "*", "**"), notes = c("+ p<0.1; * p<0.05; ** p<0.01"), notes.append = F)

############################################################################
################## Nonparametric CBGPS weighting (Section D3) ##############
############################################################################

##Remove NAs before performing the analysis
dat1<-dat[which(!is.na(dat$divMig) & !is.na(dat$SharePrzemysl1939)),]

#Assumption 1: treatment is normally distributed. Does not hold here: moderate negative skewness
plot(density(scale(dat1$divMig)))
hist(dat1$divMig)
shapiro.test(dat1$divMig)  #Shapiro test rejects normality but getting it closer to normal doesn't work so well
max.norm.cor<-cor(qqnorm(dat1$divMig)$x,qqnorm(dat1$divMig)$y)

###Need to transform the variable 
k <- max(dat1$divMig) + 0.01
dat1$divMigTr <- -1 * sqrt(k - dat1$divMig) 
## why do I multiply by -1? This is because the transformation
## shifts the orientations i.e., more is less, but -1 returns
## the meaning. The correlation between 
## the transformed and original is almost 1
shapiro.test(dat1$divMigTr)  #Shapiro test rejects normality but getting it closer to normal doesn't work so well
max.norm.cor<-cor(qqnorm(dat1$divMigTr)$x,qqnorm(dat1$divMigTr)$y)

cor(dat1$divMigTr, dat1$divMig)
plot(density(dat1$divMigTr))

#Fit npCBPS without transformation
nfe.pscore.form <- (divMig ~ shareMigrants+ PctMen1948 + ShareAged1859+PctUrb1948 +SharePrzemysl1939 +ShareOver100 +log(TotPop48) + DistPow1950+ DistLandBorder+ distrail48 + I(shareMigrants^2)+ I(PctMen1948^2)  + I(ShareAged1859^2)+I(PctUrb1948^2) +I(SharePrzemysl1939^2) +I(ShareOver100^2)+I(log(TotPop48)^2) + I(DistPow1950^2) +  I(DistLandBorder^2)  + I(distrail48^2)) 
pscorefit.nfe.np<-npCBPS(nfe.pscore.form, data = dat1, corprior=0.01/nrow(dat1))  
plot(pscorefit.nfe.np) #  

# Define the treatment model and the covariates (and second moments)
# Fit npCBPS with transformation
nfe.pscore.formTr <- (divMigTr ~ shareMigrants+ PctMen1948 + ShareAged1859+PctUrb1948 +SharePrzemysl1939 +ShareOver100 +log(TotPop48) + DistPow1950+ DistLandBorder+ distrail48 + I(shareMigrants^2)+ I(PctMen1948^2)  + I(ShareAged1859^2)+I(PctUrb1948^2) +I(SharePrzemysl1939^2) +I(ShareOver100^2)+I(log(TotPop48)^2) + I(DistPow1950^2) +  I(DistLandBorder^2)  + I(distrail48^2)) 
pscorefit.nfe.npTr<-npCBPS(nfe.pscore.formTr, data = dat1, corprior=0.01/nrow(dat1))  
plot(pscorefit.nfe.npTr) #   

#############################################
### Balance Table A.13 for the Appendix #####
#############################################

balance<-data.frame(before=balance(pscorefit.nfe.np)$unweighted, beforeTr=balance(pscorefit.nfe.npTr)$unweighted, afterTr = balance(pscorefit.nfe.npTr)$balanced)
colnames(balance)<-c("Unweighted, no transf.", "Unweighted, transf.", "Weighted, transf.")
rownames(balance)<-c("Share Migrants (1948)", "Share Men (1948)", "Share Aged 18-59","Share Urban (19428)", "Share in Industry (1939)", "Share Farms over 100 ha (1939)", "ln(Population) (1948)", "Distance to County Seat (1950)", "Distance to Border", "Distance to Railway (1948)", "Share Migrants ^2", "Share men ^ 2", "Share aged 18-59 ^2","Share Urban ^2","Share in Industry ^2","Share Farms over 100 ha ^2", "ln(Population)^2 ", "Distance to County Seat ^2", "Distance to Border ^2", "Distance to Railway ^2")

xtable(balance, digits = 2)

#####################################
### Table A.14 for the Appendix #####
#####################################

####  Estimate treatment effects (treatment var on original scale)
RegFireBr<-lm(OSPna10tys1995 ~ divMig+ GermanRegion, weights = pscorefit.nfe.npTr$weights, data = dat1)  
RegTaxShare<-lm(avgProptax9395 ~divMig+ GermanRegion, weights = pscorefit.nfe.npTr$weights, data = dat1)
RegTax<-lm(log(PropTax9395PC)~divMig+ GermanRegion, weights = pscorefit.nfe.npTr$weights, data = dat1)
RegShops<-lm(log(PunktyPerThous1980) ~divMig+GermanRegion, weights = pscorefit.nfe.npTr$weights, data = dat1)
RegPhones<-lm(log(TelsPerThs1980)~divMig+ GermanRegion, weights = pscorefit.nfe.npTr$weights, data = dat1)


stargazer(RegFireBr,RegTaxShare, RegTax,RegShops, RegPhones,  style="APSR", digits=2,  omit=c("GermanRegion", "Constant"), omit.stat=c("rsq","ser","f"), dep.var.labels=c("Fire Brigades","Tax Share", "ln(Tax Revenue)" ,"ln(Shops)", "ln(Phones)"), add.lines=list(c("District FE", "Yes", "Yes", "Yes","Yes", "Yes")), star.cutoffs=c(0.1, 0.05, 0.01), star.char=c("+", "*", "**"), notes = c("+ p<0.1; * p<0.05; ** p<0.01"), notes.append = F)

#####################################
### Table A.15 for the Appendix #####
#####################################
## Post-Communist Economic Outcomes
regInc95<-lm(log(persIncTax1995pc) ~  divMig + GermanRegion, weights = pscorefit.nfe.npTr$weights, data = dat1)
regInc98<-lm(log(persIncTax1998pc) ~  divMig +  GermanRegion, weights = pscorefit.nfe.npTr$weights, data = dat1)
regInc00<-lm(log(persIncTax2000pc) ~  divMig + GermanRegion, weights = pscorefit.nfe.npTr$weights, data = dat1) 
regEnt95<-lm(log(PodmPryw1995perthous) ~  divMig+ GermanRegion, weights = pscorefit.nfe.npTr$weights, data = dat1)  
regEnt98<-lm(log(PodmPryw1998perthous) ~  divMig + GermanRegion, weights = pscorefit.nfe.npTr$weights, data = dat1) 
regEnt00<-lm(log(PodmPryw2000perthous) ~  divMig + GermanRegion, weights = pscorefit.nfe.npTr$weights, data = dat1) 

stargazer(regInc95, regInc98, regInc00, regEnt95, regEnt98, regEnt00, style="APSR", digits=2, omit=c("GermanRegion", "Constant"), omit.stat=c("rsq","ser","f"), add.lines=list(c("District FE", "Yes", "Yes", "Yes","Yes", "Yes","Yes")), dep.var.labels.include=FALSE, column.labels=c("ln(Personal Income Tax per capita)", "ln(Private Enterprises per 1,000"), column.separate=c(3,3), star.char=c("+", "*", "**"), notes = c("+ p<0.1; * p<0.05; ** p<0.01"), notes.append = F)

#################################################################
####  SENSITIVITY ANALYSES FOR MAIN OUTCOMES (Appendix D.4) #####
#################################################################
library(treatSens) 
#The package only supports Gaussian (normal) response function 
#Step 1: standardize all continuous covariates and the outcome to have mean zero and sd of one:
#Step 2: negative covariates need to be multiplied by -1, so that estimated coefficients are positive.
#Note: treatSens takes 10-15 min to run.
dat1<-dat[which(!is.na(dat$divMig) & !is.na(dat$SharePrzemysl1939)),] #Remove NAs

OSPfin<-treatSens(OSPna10tys1995 ~divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48,  data=dat1, standardize=TRUE) 


dat1$TelsPerThs1980Ln<-log(dat1$TelsPerThs1980)
phonesSens<-treatSens(TelsPerThs1980Ln ~divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48, data=dat1, standardize=TRUE, core=4)   

dat1$PunktyPerThous1980Ln<-log(dat1$PunktyPerThous1980)
shopsSens<-treatSens(PunktyPerThous1980Ln ~divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48, data=dat1, standardize=TRUE, core=4) 

dat1$PropTax9395PCLog<-log(dat1$PropTax9395PC)
tax9395sens<-treatSens(PropTax9395PCLog ~divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48, data=dat1, standardize=TRUE, core=4) 

dat1$persIncTax1995pcLog<-log(dat1$persIncTax1995pc)
Inc95sens<-treatSens(persIncTax1995pcLog ~divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48, data=dat1, standardize=TRUE, core=4) 

dat1$persIncTax1998pcLog<-log(dat1$persIncTax1998pc)
Inc98sens<-treatSens(persIncTax1998pcLog ~divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48,  data=dat1, standardize=TRUE, core=4) #

dat1$persIncTax2000pcLog<-log(dat1$persIncTax2000pc)
Inc00sens<-treatSens(persIncTax2000pcLog ~divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48, data=dat1, standardize=TRUE, core=4) #

#Enterprises
dat1$PodmPryw1995perthousLog<-log(dat1$PodmPryw1995perthous)
ent95sens<-treatSens(PodmPryw1995perthousLog ~divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48, data=dat1, standardize=TRUE, core=4)   

dat1$PodmPryw1998perthousLog<-log(dat1$PodmPryw1998perthous)
ent98sens<-treatSens(PodmPryw1998perthousLog ~divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48, data=dat1, standardize=TRUE, core=4)#

dat1$PodmPryw2000perthousLog<-log(dat1$PodmPryw2000perthous)
ent00sens<-treatSens(PodmPryw2000perthousLog ~divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48, data=dat1, standardize=TRUE, core=4) #,  

##Plot results
sensPlot(OSPfin) #Add labels to covariates: txtlab=TRUE. Covariate closest to lines is PctUrb1948 and distance to border
title(main="DV: Volunteer Fire Brigades")

sensPlot(tax9395sens) #Share Industry is the most powerful covariate
title(main="DV: Property Tax Revenue, 1993-95")

sensPlot(phonesSens) #Most covariate is shareUrban are powerful enough to change the relationship
title(main="DV: Phones per 1000, 1982")
 
sensPlot(shopsSens) #powerful covariates: distance to land border, Share urban, distance to county center.
title(main="DV: Shops per 1000, 1982")
 
sensPlot(Inc95sens) #Share resettled is the most powerful covariate
title(main="DV: Personal Income Tax, 1995")

sensPlot(Inc98sens)  #Share Przemysl is most powerful
title(main="DV: Personal Income Tax, 1998")
 
sensPlot(Inc00sens) #Share Przemysl is most powerful
title(main="DV: Personal Income Tax, 2000")

sensPlot(ent95sens) 
title(main="DV: Private Enterprises, 1995") #Share Przemysl is between the lines and share migr is close.

sensPlot(ent98sens)
title(main="DV: Private Enterprises, 1998")  #Share Przemysl is between the lines and share migr is close.

sensPlot(ent00sens) 
title(main="DV: Private Enterprises, 2000") 

#####################################################################################
####  D.6 INSTRUMENTING FOR RELIANCE ON VOLUNTEER FIRE BRIGADES WITH DIVERSITY  #####
#####################################################################################

library(AER)
library(lmtest)
library(systemfit)
library(ivmodel)
library(ivpack) 

###Using heterogeneity as an instrument for reliance on formal vs. informal institutions

first = lm(OSPna10tys1995 ~divMig + shareMigrants +PctUrb1948+ SharePrzemysl1939+ ShareOver100  +log(TotPop48) + DistPow1950 +DistLandBorder+ distrail48 +GermanRegion, data = dat)
summary(first, diagnostics=TRUE) 

reducedInc = lm(log(persIncTax2000pc) ~ OSPna10tys1995 + shareMigrants +PctUrb1948+ SharePrzemysl1939+ ShareOver100  +log(TotPop48) + DistPow1950 +DistLandBorder+ distrail48 +GermanRegion, data = dat)
summary(reducedInc, diagnostics=TRUE) 

reducedEnt = lm(log(PodmPryw2000perthous) ~ OSPna10tys1995 + shareMigrants +PctUrb1948+ SharePrzemysl1939+ ShareOver100  +log(TotPop48) + DistPow1950 +DistLandBorder+ distrail48 +GermanRegion, data = dat)
summary(reducedEnt, diagnostics=TRUE) 

tslsInc = ivreg(log(persIncTax2000pc) ~ OSPna10tys1995 + shareMigrants +PctUrb1948+ SharePrzemysl1939+ ShareOver100  +log(TotPop48) + DistPow1950 +DistLandBorder+ distrail48 +GermanRegion | divMig +shareMigrants +PctUrb1948+ SharePrzemysl1939+ ShareOver100  +log(TotPop48) +DistPow1950 +DistLandBorder+ distrail48 +GermanRegion, data = dat)
summary(tslsInc, diagnostics=TRUE) 

tslsEnt = ivreg(log(PodmPryw2000perthous) ~ OSPna10tys1995  + shareMigrants +PctUrb1948+ SharePrzemysl1939 + ShareOver100 +log(TotPop48) + DistPow1950 +DistLandBorder+ distrail48 + GermanRegion | divMig  + shareMigrants +PctUrb1948+ SharePrzemysl1939  + ShareOver100 +log(TotPop48) +DistPow1950 +DistLandBorder+ distrail48 + GermanRegion, data = dat)
summary(tslsEnt, diagnostics=TRUE) 

### Table A19: 
stargazer(first, reducedInc, reducedEnt, tslsInc, tslsEnt, style="APSR", digits=2, omit=c("GermanRegion"), omit.stat=c("rsq","ser","f"), add.lines=list(c("Covariates", "Yes", "Yes", "Yes","Yes", "Yes","Yes"), c("District FE", "Yes", "Yes", "Yes", "Yes", "Yes","Yes")), star.cutoffs=c(0.1, 0.05, 0.01), star.char=c("+", "*", "**"), notes = c("+ p<0.1; * p<0.05; ** p<0.01"), notes.append = F)

##########################################################################
####  ALTERNATIVE EXPLANATIONS: Table A20-21. Cultural differences    ####
##########################################################################

### Volunteer Fire Brigades
OSPussr1<-lm(OSPna10tys1995 ~ PctUSSR + PctUrb1948+ SharePrzemysl1939+ ShareOver100  +log(TotPop48) + DistPow1950 +DistLandBorder+ distrail48 +GermanRegion, data=dat) 
summary(OSPussr1)  
    
OSPcp1<-lm(OSPna10tys1995 ~ PctCP  +PctUrb1948+ SharePrzemysl1939+ ShareOver100  +log(TotPop48) + DistPow1950 +DistLandBorder+ distrail48 +GermanRegion, data=dat) 
summary(OSPcp1) 

OSPother1<-lm(OSPna10tys1995 ~ PctOther +PctUrb1948+ SharePrzemysl1939+ ShareOver100  +log(TotPop48) + DistPow1950 +DistLandBorder+ distrail48 +GermanRegion, data=dat) 
summary(OSPother1) 

#### Municipal Guard: 
strazUSSR<-glm(Straz2007 ~ PctUSSR + PctUrb1948+ SharePrzemysl1939+ ShareOver100  +log(TotPop48) + DistPow1950 +DistLandBorder+ distrail48 +GermanRegion, data=dat,family=binomial(link='logit'))#
summary(strazUSSR)
   
strazCP<-glm(Straz2007 ~ PctCP  +PctUrb1948+ SharePrzemysl1939+ ShareOver100  +log(TotPop48) + DistPow1950 +DistLandBorder+ distrail48 +GermanRegion, data=dat,family=binomial(link='logit')) #GermanRegion
summary(strazCP) 
  
strazOther<-glm(Straz2007 ~ PctOther +PctUrb1948+ SharePrzemysl1939+ ShareOver100  +log(TotPop48) + DistPow1950 +DistLandBorder+ distrail48 +GermanRegion, data=dat,family=binomial(link='logit')) #GermanRegion
summary(strazOther) 

## Tax Revenuews: nothing is significant
taxrevUSSR<-lm(log(PropTax9395PC) ~ PctUSSR + PctUrb1948+ SharePrzemysl1939+ ShareOver100  +log(TotPop48) + DistPow1950 +DistLandBorder+ distrail48 +GermanRegion, data=dat)
summary(taxrevUSSR)
taxrevCP<-lm(log(PropTax9395PC) ~ PctCP + PctUrb1948+ SharePrzemysl1939+ ShareOver100  +log(TotPop48) + DistPow1950 +DistLandBorder+ distrail48 +GermanRegion, data=dat)
summary(taxrevCP)

taxrevOther<-lm(log(PropTax9395PC) ~ PctOther + PctUrb1948+ SharePrzemysl1939+ ShareOver100  +log(TotPop48) + DistPow1950 +DistLandBorder+ distrail48 +GermanRegion, data=dat)
summary(taxrevOther)

stargazer(OSPussr1, OSPcp1, OSPother1, strazUSSR, strazCP, strazOther,  taxrevUSSR, taxrevCP, taxrevOther, style="APSR", digits=2,  omit=c("GermanRegion", "Constant", "DistLandBorder", "TotPop48", "DistPow1950", "PctUrb1948", "SharePrzemysl1939", "ShareOver100", "distrail48"), omit.stat=c("rsq","ser","f"), add.lines=list(c("Covariates", "Yes", "Yes", "Yes","Yes", "Yes","Yes","Yes", "Yes","Yes"), c("District FE", "Yes", "Yes", "Yes","Yes", "Yes","Yes","Yes", "Yes","Yes")), dep.var.labels.include=FALSE, column.labels=c("Volunteer Fire Brigades", "Municipal Guard", "ln(Property Tax Revenue)"), column.separate=c(3,3,3), star.cutoffs=c(0.1, 0.05, 0.01), star.char=c("+", "*", "**"), notes = c("+ p<0.1; * p<0.05; ** p<0.01"), notes.append = F)


## Postcommunist economic outcomes
inc95USSR<-lm(log(persIncTax1995pc) ~ PctUSSR + PctUrb1948+ SharePrzemysl1939+ ShareOver100  +log(TotPop48) + DistPow1950 +DistLandBorder+ distrail48 +GermanRegion, data=dat)
inc95CP<-lm(log(persIncTax1995pc) ~ PctCP + PctUrb1948+ SharePrzemysl1939+ ShareOver100  +log(TotPop48) + DistPow1950 +DistLandBorder+ distrail48 +GermanRegion, data=dat)
summary(inc95CP)
inc95Other<-lm(log(persIncTax1995pc) ~ PctOther + PctUrb1948+ SharePrzemysl1939+ ShareOver100  +log(TotPop48) + DistPow1950 +DistLandBorder+ distrail48 +GermanRegion, data=dat)
summary(inc95Other)
ent95USSR<-lm(log(PodmPryw1995perthous) ~ PctUSSR + PctUrb1948+ SharePrzemysl1939+ ShareOver100  +log(TotPop48) + DistPow1950 +DistLandBorder+ distrail48 +GermanRegion, data=dat)
summary(ent95USSR)
ent95CP<-lm(log(PodmPryw1995perthous) ~ PctCP + PctUrb1948+ SharePrzemysl1939+ ShareOver100  +log(TotPop48) + DistPow1950 +DistLandBorder+ distrail48 +GermanRegion, data=dat)
summary(ent95CP)
ent95Other<-lm(log(PodmPryw1995perthous) ~ PctOther + PctUrb1948+ SharePrzemysl1939+ ShareOver100  +log(TotPop48) + DistPow1950 +DistLandBorder+ distrail48 +GermanRegion, data=dat)
summary(ent95Other)

stargazer(inc95USSR, inc95CP, inc95Other,  ent95USSR, ent95CP, ent95Other,  style="APSR", digits=2, omit=c("GermanRegion", "Constant", "DistLandBorder", "TotPop48", "DistPow1950", "PctUrb1948", "SharePrzemysl1939", "ShareOver100", "distrail48"), omit.stat=c("rsq","ser","f"), add.lines=list(c("Covariates", "Yes", "Yes", "Yes","Yes", "Yes","Yes"), c("District FE", "Yes", "Yes", "Yes","Yes", "Yes","Yes")), dep.var.labels.include=FALSE, column.labels=c("ln(Personal Income Tax per capita, 1995)", "ln(Private Enterprises per 1000, 1995)"), column.separate=c(3,3), star.cutoffs=c(0.1, 0.05, 0.01), star.char=c("+", "*", "**"), notes = c("+ p<0.1; * p<0.05; ** p<0.01"), notes.append = F)


###############################################################################
############ Table A22: Diversity using four groups + group dummies ###########
###############################################################################

dat$DivTot<-1-(dat$PctUSSR^2+dat$PctCP^2+dat$PctOther^2+dat$PctAut^2)
dat$shareLargest<-rep(NA, nrow(dat))
for (i in 1:nrow(dat)){
	dat$shareLargest[i]<-max(dat$PctUSSR[i], dat$PctCP[i], dat$PctOther[i], dat$PctAut[i])
}

### Factor for which group is largest
dat$whichLargest<-rep(NA, nrow(dat))
dat$whichLargest[dat$shareLargest==dat$PctUSSR]<-"USSR"
dat$whichLargest[dat$shareLargest==dat$PctCP]<-"CP"
dat$whichLargest[dat$shareLargest==dat$PctOther]<-"Other"
dat$whichLargest[dat$shareLargest==dat$PctAut]<-"Aut"
dat$whichLargest<-relevel(as.factor(dat$whichLargest), ref="Other")


inc95AltDiv<-lm(log(persIncTax1995pc) ~ DivTot  + whichLargest +PctUrb1948+ SharePrzemysl1939+ ShareOver100  +log(TotPop48) + DistPow1950 +DistLandBorder+ distrail48 +GermanRegion, data=dat)
summary(inc95AltDiv)

ent95AltDiv<-lm(log(PodmPryw1995perthous) ~ DivTot + whichLargest + PctUrb1948+ SharePrzemysl1939+ ShareOver100  +log(TotPop48) + DistPow1950 +DistLandBorder+ distrail48 +GermanRegion, data=dat)
summary(ent95AltDiv)

inc98AltDiv<-lm(log(persIncTax1998pc) ~ DivTot  + whichLargest +PctUrb1948+ SharePrzemysl1939+ ShareOver100  +log(TotPop48) + DistPow1950 +DistLandBorder+ distrail48 +GermanRegion, data=dat)
summary(inc98AltDiv)

ent98AltDiv<-lm(log(PodmPryw1998perthous) ~ DivTot + whichLargest + PctUrb1948+ SharePrzemysl1939+ ShareOver100  +log(TotPop48) + DistPow1950 +DistLandBorder+ distrail48 +GermanRegion, data=dat)
summary(ent98AltDiv)

inc00AltDiv<-lm(log(persIncTax2000pc) ~ DivTot  + whichLargest +PctUrb1948+ SharePrzemysl1939+ ShareOver100  +log(TotPop48) + DistPow1950 +DistLandBorder+ distrail48 +GermanRegion, data=dat)
summary(inc00AltDiv)

ent00AltDiv<-lm(log(PodmPryw2000perthous) ~ DivTot + whichLargest + PctUrb1948+ SharePrzemysl1939+ ShareOver100  +log(TotPop48) + DistPow1950 +DistLandBorder+ distrail48 +GermanRegion, data=dat)
summary(ent00AltDiv)

stargazer(inc95AltDiv, inc98AltDiv, inc00AltDiv, ent95AltDiv, ent98AltDiv, ent00AltDiv,  style="APSR", digits=2,  omit=c("GermanRegion", "Constant", "DistLandBorder", "TotPop48", "DistPow1950", "PctUrb1948", "SharePrzemysl1939", "ShareOver100", "distrail48"), omit.stat=c("rsq","ser","f"), add.lines=list(c("Covariates", "Yes", "Yes", "Yes","Yes", "Yes","Yes"), c("District FE", "Yes", "Yes", "Yes","Yes", "Yes","Yes")), dep.var.labels.include=TRUE, column.labels=c("ln(Personal Income Tax per capita)", "ln(Private Enterprises per 1000)"), column.separate=c(3,3), star.cutoffs=c(0.1, 0.05, 0.01), star.char=c("+", "*", "**"), notes = c("+ p<0.1; * p<0.05; ** p<0.01"), notes.append = F)


###############################################################################
############ Table A23: Diversity using Herfindahl w/ four groups  ############
###############################################################################


inc95AltDiv2<-lm(log(persIncTax1995pc) ~ DivTot +PctUrb1948+ SharePrzemysl1939+ ShareOver100  +log(TotPop48) + DistPow1950 +DistLandBorder+ distrail48 +GermanRegion, data=dat)
summary(inc95AltDiv2)

ent95AltDiv2<-lm(log(PodmPryw1995perthous) ~ DivTot + PctUrb1948+ SharePrzemysl1939+ ShareOver100  +log(TotPop48) + DistPow1950 +DistLandBorder+ distrail48 +GermanRegion, data=dat)
summary(ent95AltDiv2)

inc98AltDiv2<-lm(log(persIncTax1998pc) ~ DivTot +PctUrb1948+ SharePrzemysl1939+ ShareOver100  +log(TotPop48) + DistPow1950 +DistLandBorder+ distrail48 +GermanRegion, data=dat)
summary(inc98AltDiv2)

ent98AltDiv2<-lm(log(PodmPryw1998perthous) ~ DivTot + PctUrb1948+ SharePrzemysl1939+ ShareOver100  +log(TotPop48) + DistPow1950 +DistLandBorder+ distrail48 +GermanRegion, data=dat)
summary(ent98AltDiv2)

inc00AltDiv2<-lm(log(persIncTax2000pc) ~ DivTot  +PctUrb1948+ SharePrzemysl1939+ ShareOver100  +log(TotPop48) + DistPow1950 +DistLandBorder+ distrail48 +GermanRegion, data=dat)
summary(inc00AltDiv2)

ent00AltDiv2<-lm(log(PodmPryw2000perthous) ~ DivTot + PctUrb1948+ SharePrzemysl1939+ ShareOver100  +log(TotPop48) + DistPow1950 +DistLandBorder+ distrail48 +GermanRegion, data=dat)
summary(ent00AltDiv2)

stargazer(inc95AltDiv2, inc98AltDiv2, inc00AltDiv2, ent95AltDiv2, ent98AltDiv2, ent00AltDiv2,  style="APSR", digits=2,  omit=c("GermanRegion", "Constant", "DistLandBorder", "TotPop48", "DistPow1950", "PctUrb1948", "SharePrzemysl1939", "ShareOver100", "distrail48"), omit.stat=c("rsq","ser","f"), add.lines=list(c("Covariates", "Yes", "Yes", "Yes","Yes", "Yes","Yes"), c("District FE", "Yes", "Yes", "Yes","Yes", "Yes","Yes")), dep.var.labels.include=TRUE, column.labels=c("ln(Personal Income Tax per capita)", "ln(Private Enterprises per 1000)"), column.separate=c(3,3), star.cutoffs=c(0.1, 0.05, 0.01), star.char=c("+", "*", "**"), notes = c("+ p<0.1; * p<0.05; ** p<0.01"), notes.append = F)

##########################################################################
############ Table A24: Human Capital: Education in 1978, 1988, 2002  ####
##########################################################################
dat$ShareWyzsze78<-(dat$WyszeW78+ dat$niepWysze78+ dat$policealne78)/dat$Pop15Above78
dat$ShareWyzsze88<-dat$TertEdu1988/dat$Pop10to191988  
dat$EduDiv88<-1-(dat$ShareWyzsze88^2+(dat$VocEdu1988/dat$Pop10to191988)^2+(dat$SecEdu1988/dat$Pop10to191988)^2+(dat$PrimEdu1988/dat$Pop10to191988)^2)

edu78a<-lm(ShareWyzsze78 ~  divMig, data=dat)
summary(edu78a)  
edu78b<-lm(ShareWyzsze78 ~  divMig + shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+GermanRegion, data=dat)
summary(edu78b) 
 
edu88a<-lm(ShareWyzsze88 ~  divMig, data=dat)
summary(edu88a) 

edu88b<-lm(ShareWyzsze88 ~  divMig + shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+GermanRegion, data=dat)
summary(edu88b) 

edu88c<-lm(EduDiv88 ~  divMig + shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+GermanRegion, data=dat)
summary(edu88c) 

edu02a<-lm(ShareWyzsze02 ~   divMig, data=dat)
summary(edu02a) 

edu02b<-lm(ShareWyzsze02 ~   divMig + shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+GermanRegion, data=dat)
summary(edu02b) 

incedu95a<-lm(log(persIncTax1995pc) ~ ShareWyzsze78 +divMig + shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+GermanRegion, data=dat)  
summary(incedu95a) #

incedu95b<-lm(log(persIncTax1995pc) ~ ShareWyzsze88 +divMig + shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+GermanRegion, data=dat)  
summary(incedu95b) # 

incedu95c<-lm(log(persIncTax1995pc) ~ EduDiv88 +divMig + shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+GermanRegion, data=dat)  
summary(incedu95c) # 

entedu95a<-lm(log(PodmPryw1995perthous) ~ ShareWyzsze78 +divMig + shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+GermanRegion, data=dat)  
summary(entedu95a) #

entedu95b<-lm(log(PodmPryw1995perthous) ~ ShareWyzsze88 +divMig + shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+GermanRegion, data=dat)  
summary(entedu95b)  

entedu95c<-lm(log(PodmPryw1995perthous) ~ EduDiv88 +divMig + shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+GermanRegion, data=dat)  
summary(entedu95c) 

## Combine key results in a latex table: 
stargazer(edu78b, edu88b, edu02b, edu88c, incedu95b, incedu95c, entedu95b, entedu95c,  style="APSR", digits=2, omit=c("GermanRegion", "Constant", "DistLandBorder", "TotPop48", "DistPow1950", "PctUrb1948", "SharePrzemysl1939", "ShareOver100", "distrail48"), omit.stat=c("rsq","ser","f"), add.lines=list(c("Covariates", "Yes", "Yes", "Yes","Yes", "Yes","Yes"), c("District FE", "Yes", "Yes", "Yes","Yes", "Yes","Yes")), dep.var.labels.include=TRUE, column.labels=c("Share w/ Higher Edu","Edu Diversity", "ln(Personal Income Tax", "ln(Private Enterprises)"), column.separate=c(3,1,2,2), star.cutoffs=c(0.1, 0.05, 0.01), star.char=c("+", "*", "**"), notes = c("+ p<0.1; * p<0.05; ** p<0.01"), notes.append = F)

################################################################
############ Tables A25-26: Occupational Heterogeneity   #######
################################################################

occ<-read.csv("data/AllOccupations1950.csv")
head(occ)
##Occupational heterogeneity: 
occ$PctInd<-occ$Przemysl/(occ$Ogolem-occ$UnknOcc)
occ$PctBud<-occ$Budownictwo/(occ$Ogolem-occ$UnknOcc)
occ$PctRoln<-occ$Rolnictwo/(occ$Ogolem-occ$UnknOcc)
occ$PctLesn<-occ$Lesnictwo/(occ$Ogolem-occ$UnknOcc)
occ$PctKomm<-occ$Komunikacja/(occ$Ogolem-occ$UnknOcc)
occ$PctTrade<-occ$Trade/(occ$Ogolem-occ$UnknOcc)
occ$PctGospKom<-occ$GospKom/(occ$Ogolem-occ$UnknOcc)
occ$PctSci<-occ$Nauka/(occ$Ogolem-occ$UnknOcc)
occ$PctHealth<-occ$Health/(occ$Ogolem-occ$UnknOcc)
occ$PctAdm<-occ$AdminBank/(occ$Ogolem-occ$UnknOcc)
occ$PctOther<-occ$OtherOcc/(occ$Ogolem-occ$UnknOcc)

occ$OccFrac50<-1-(occ$PctInd^2+occ$PctBud^2+occ$PctRoln^2+occ$PctLesn^2+ occ$PctKomm^2+occ$PctTrade^2+occ$PctGospKom^2+occ$PctSci^2+occ$PctHealth^2+ occ$PctAdm^2+occ$PctOther^2)
dat2<-merge(dat, occ[,c(1,38)], by="NoPow1950", all.X=TRUE)

#Aggegate data to county level: 
datP<-ddply(dat2, .(NoPow1950), summarise, divMig=mean(divMig, na.rm=TRUE), shareMigrants=mean(shareMigrants, na.rm=TRUE), PctUrb1948=mean(PctUrb1948, na.rm=TRUE), SharePrzemysl1939=mean(SharePrzemysl1939, na.rm=TRUE), distrail48=mean(distrail48, na.rm=TRUE), PodmPryw1995perthous=mean(PodmPryw1995perthous, na.rm=TRUE), PrywAll95=sum(PrywAll95, na.rm=TRUE), IncPers95=sum(IncPers95, na.rm=TRUE), Pop1995=sum(Pop1995, na.rm=TRUE), persIncTax1995pc=mean(persIncTax1995pc, na.rm=TRUE), OccFrac50=mean(OccFrac50, na.rm=TRUE), TotPop48=sum(TotPop48, na.rm=TRUE), DistLandBorder=mean(DistLandBorder, na.rm=TRUE)) 
dim(datP)


occdiv0<-lm(OccFrac50 ~ divMig+ shareMigrants, data= datP) 
summary(occdiv0) 

occdiv1<-lm(OccFrac50 ~ divMig+ shareMigrants + distrail48, data= datP) 
summary(occdiv1) 

occdiv2<-lm(OccFrac50 ~ divMig+ shareMigrants +SharePrzemysl1939, data= datP) 
summary(occdiv2) 

occdiv3<-lm(OccFrac50 ~ divMig+ shareMigrants +SharePrzemysl1939+ PctUrb1948+ distrail48+log(TotPop48)+ DistLandBorder, data= datP) 
summary(occdiv3) 

######################################################################################
###### Table A25: Diversity in 1948 and Occupational Heterogeneity in 1950 counties. #
######################################################################################
stargazer(occdiv0, occdiv1, occdiv2, occdiv3, style="APSR", digits=2, omit=c("Constant"), omit.stat=c("rsq","ser","f"), dep.var.labels.include=TRUE, column.labels=c("Occupational Heterogeneity"), column.separate=4, star.cutoffs=c(0.1, 0.05, 0.01), star.char=c("+", "*", "**"), notes = c("+ p<0.1; * p<0.05; ** p<0.01"), notes.append = F)

##### Replicate main analysis with OccFrac50 in the model

inc1995pcO<-lm(log(persIncTax1995pc) ~  OccFrac50 +divMig+ shareMigrants + PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+GermanRegion, data= dat2)
summary(inc1995pcO) #  

inc1998pcO<-lm(log(persIncTax1998pc) ~  OccFrac50 +divMig+ shareMigrants + PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+GermanRegion, data=dat2)
summary(inc1998pcO) #

inc2000pcO<-lm(log(persIncTax2000pc) ~  OccFrac50 +divMig+ shareMigrants + PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+GermanRegion, data=dat2)
summary(inc2000pcO) #

regPriv95_O<-lm(log(PodmPryw1995perthous) ~ OccFrac50 +divMig+ shareMigrants + PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+GermanRegion, data=dat2) 
summary(regPriv95_O)

regPriv98_O<-lm(log(PodmPryw1998perthous) ~ OccFrac50 +divMig+ shareMigrants + PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+GermanRegion, data=dat2) 
summary(regPriv98_O)

regPriv00_O<-lm(log(PodmPryw2000perthous) ~ OccFrac50 +divMig+ shareMigrants + PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+GermanRegion, data=dat2) 
summary(regPriv00_O)

###############################################################################
#### Table A26: OLS Regression w/ Posttreatment Occupational Heterogeneity ####
###############################################################################

stargazer(inc1995pcO, inc1998pcO, inc2000pcO, regPriv95_O, regPriv98_O, regPriv00_O, digits=2, style="APSR", omit=c("GermanRegion", "Constant", "DistLandBorder", "TotPop48", "DistPow1950", "PctUrb1948", "SharePrzemysl1939", "ShareOver100", "distrail48"), omit.stat=c("rsq","ser","f"), add.lines=list(c("Covariates", "Yes", "Yes", "Yes","Yes", "Yes","Yes"), c("District FE", "Yes", "Yes", "Yes","Yes", "Yes","Yes")), dep.var.labels.include=TRUE, column.labels=c("ln(Personal Income Tax", "ln(Private Enterprises)"), column.separate=c(3,3), star.cutoffs=c(0.1, 0.05, 0.01), star.char=c("+", "*", "**"), notes = c("+ p<0.1; * p<0.05; ** p<0.01"), notes.append = F)

##############################################################
############ F.4 State Policyduring Communist Period  ########
##############################################################


dat$LibsPerThous1980<-dat$Biblioteki1982/(dat$Ludnosc1980/1000)
dat$SchoolsPerThous1980<-dat$Szkoly1982/(dat$Ludnosc1980/1000)
dat$WRolnUspRol1982<-dat$GospUspRolnictwo/(dat$Ludnosc1980/1000)
dat$WPrzemUsp1982<-dat$GospUspPrzemysl/(dat$Ludnosc1980/1000)
dat$SrWyrownPts80<-dat$SrodkiWyrownawcze1980/(dat$Ludnosc1980/1000)
dat$DochodyBudzetowePts1980 <-dat$DochodyBudzetowe1980/(dat$Ludnosc1980/1000)
dat$DochodyWlasnePts1980 <-dat$DochodyBudzetoweWlasne1980/(dat$Ludnosc1980/1000)

##Need municipal budget

libs1<-lm(LibsPerThous1980 ~ divMig + shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+GermanRegion, data=dat)
summary(libs1)#not sig.

sch1<-lm(SchoolsPerThous1980 ~ divMig + shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+GermanRegion, data=dat)
summary(sch1)# 

usp1<-lm(ShareUspPerThs1982 ~ divMig + shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+GermanRegion, data=dat)
summary(usp1)

uspRln82<-lm(WRolnUspRol1982 ~ divMig + shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+GermanRegion, data=dat)
summary(uspRln82)

uspPrzem82<-lm(WPrzemUsp1982 ~ divMig + shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+GermanRegion, data=dat)
summary(uspPrzem82)
## Total 
dochody80<-lm(DochodyBudzetowePts1980 ~ divMig + shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+GermanRegion, data=dat)
summary(dochody80)
## Wlasne dochody
dochodyWl80<-lm(DochodyWlasnePts1980 ~ divMig + shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+GermanRegion, data=dat)
summary(dochodyWl80)
## Srodki Wyrownawcze
wyrown80<-lm(SrWyrownPts80 ~ divMig + shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+GermanRegion, data=dat)
summary(wyrown80)

stargazer(libs1, sch1, uspRln82, uspPrzem82, dochodyWl80, wyrown80, style="APSR", digits=2, omit=c("GermanRegion", "Constant", "DistLandBorder", "TotPop48", "DistPow1950", "PctUrb1948", "SharePrzemysl1939", "ShareOver100", "distrail48"), omit.stat=c("rsq","ser","f"), add.lines=list(c("Covariates", "Yes", "Yes", "Yes","Yes", "Yes","Yes"), c("District FE", "Yes", "Yes", "Yes","Yes", "Yes","Yes")), dep.var.labels.include=TRUE, column.labels=c("Libraries", "Schools", "Agriculture", "Industry", "Municipal income", "Compensatory measures"), column.separate=c(4,2,2), star.cutoffs=c(0.1, 0.05, 0.01), star.char=c("+", "*", "**"), notes = c("+ p<0.1; * p<0.05; ** p<0.01"), notes.append = F)


#################################################
############ F.5 Sorting : Table A28 ############
#################################################

dat$PtNotFromBirth<-1-dat$FromBirthNo1988/dat$TotPop1988
dat$PtArrived7178<-dat$Arrived1971_78No/dat$TotPop1988
dat$PtArrived8188<-dat$Arrived1981_88No/dat$TotPop1988

sort1a<-lm(PtNotFromBirth ~divMig, data=dat)
summary(sort1a)  

sort1b<-lm(PtNotFromBirth ~divMig+ shareMigrants+ SharePrzemysl1939, data=dat)
summary(sort1b) 

sort1c<-lm(PtNotFromBirth ~ divMig + shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+GermanRegion, data=dat)
summary(sort1c)

sort4<-lm(PtArrived7178 ~divMig + shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+GermanRegion, data=dat)
summary(sort4) 
sort5<-lm(PtArrived8188 ~divMig + shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+GermanRegion, data=dat)
summary(sort5)

stargazer(sort1a, sort1b, sort1c, sort4, sort5,style="APSR", digits=2, omit=c("GermanRegion", "Constant", "DistLandBorder", "TotPop48", "DistPow1950", "PctUrb1948", "ShareOver100", "distrail48"), omit.stat=c("rsq","ser","f"), add.lines=list(c("Other Covariates", "No", "No", "Yes","Yes", "Yes","Yes"), c("District FE", "No", "No", "Yes","Yes", "Yes","Yes")), dep.var.labels.include=TRUE, column.labels=c("Not Living from Birth", "Arrived (1970s)", "Arrived (1980s)"), column.separate=c(3,1,1), star.cutoffs=c(0.1, 0.05, 0.01), star.char=c("+", "*", "**"), notes = c("+ p<0.1; * p<0.05; ** p<0.01"), notes.append = F)


##################################################################################
##################  APPENDIX: ANALYSIS of CRIME DATA, Counties in 1961-1962 ######
##################################################################################

pr<-read.csv("Appendix_Crime.csv")
head(pr)

#### Constructing main variables
pr$Pop<-pr$autochtpop1950+ pr$fromCentrPoland1950+ pr$fromUSSR1950+ pr$fromAbroad1950
pr$ShareAut1950<-pr$autochtpop1950/pr$Pop
pr$ShareCP1950<-pr$fromCentrPoland1950/pr$Pop
pr$ShareUSSR1950<-pr$fromUSSR1950/pr$Pop
pr$ShareOther1950<-pr$fromAbroad1950/pr$Pop

pr$PctUSSRWithin<-pr$ShareUSSR1950/(1-pr$ShareAut1950)
pr$PctCentrPWithin<-pr$ShareCP1950/(1-pr$ShareAut1950)
pr$PctOtherCWithin<-pr$ShareOther1950/(1-pr$ShareAut1950)
pr$shareResettled<-(pr$fromCentrPoland1950+ pr$fromUSSR1950+ pr$fromAbroad1950)/pr$Pop

pr$divMig<-pr$PctUSSRWithin*(1-pr$PctUSSRWithin)+ pr$PctCentrPWithin*(1-pr$PctCentrPWithin)+ pr$PctOtherCWithin*(1-pr$PctOtherCWithin)
summary(pr$divMig)

pr$ShareOver100<-pr$GospOver100/pr$GospodRolne  
pr$shareUrb1950<-pr$totUrb1950/pr$totpop1950
pr$ShareInd1939<-pr$LudnoscPrzemysl/pr$LudnoscOgolem 

pr$TotCrime<-pr$Ogolem/pr$LudnoscTys 
pr$Theft<-pr$KradziezAll/pr$LudnoscTys   
pr$Robbery<-pr$Rozboj/pr$LudnoscTys   
pr$Hooliganism<-pr$Chuliganstwo/pr$LudnoscTys   

## Regression analysis: 
regAllCrimes<-lm(TotCrime ~ divMig+ shareResettled+ shareUrb1950 + ShareInd1939 + Border + ShareOver100 +Woj., data=pr)#
regTheft<-lm(Theft ~ divMig+ shareResettled+ shareUrb1950 + ShareInd1939 + Border + ShareOver100+Woj., data=pr)
regRobbery<-lm(Robbery ~ divMig+ shareResettled+ shareUrb1950 + ShareInd1939 + Border + ShareOver100+Woj., data=pr)##
regHoolig<-lm(Hooliganism ~ divMig+ shareResettled+ shareUrb1950 + ShareInd1939 + Border + ShareOver100+Woj., data=pr)##

stargazer(regAllCrimes, regTheft, regRobbery, regHoolig, style="APSR", digits=2,  omit=c("Woj.", "Constant"), omit.stat=c("rsq","ser","f"), add.lines=list(c("Covariates", "Yes","Yes", "Yes","Yes"), c("District FE",  "Yes", "Yes", "Yes", "Yes")), column.labels=c("All Crimes", "Theft", "Robbery", "Hooliganism"), dep.var.labels.include=FALSE, star.cutoffs=c(0.1, 0.05, 0.01), star.char=c("+", "*", "**"), notes = c("+ p<0.1; * p<0.05; ** p<0.01"), notes.append = F)

